qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: "Aviv B.D." <bd.aviv@gmail.com>,
	qemu-devel@nongnu.org, "Michael S. Tsirkin" <mst@redhat.com>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Alex Williamson <alex.williamson@redhat.com>,
	Peter Xu <peterx@redhat.com>
Subject: Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present
Date: Mon, 14 Mar 2016 20:52:33 +0200	[thread overview]
Message-ID: <56E70871.3050305@gmail.com> (raw)
In-Reply-To: <CAM3WwMgvSW7c74An0zGk+DbVmPmDpb5Q0ghjLC09H6dXXxaRoQ@mail.gmail.com>

On 03/12/2016 06:13 PM, Aviv B.D. wrote:
> From: "Aviv B.D." <bd.aviv@gmail.com <mailto:bd.aviv@gmail.com>>
>
>   * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>      device are present.
>   * Advertise Cache Mode capability in iommu cap register.
>   * Register every VFIO device with IOMMU state.
>   * On page cache invalidation in vIOMMU, check if the domain belong to
>     VFIO device and mirror the guest requests to host.
>
> Not working (Yet!):
>   * Tested only with network interface card (ixgbevf) and
>      intel_iommu=strict in guest's kernel command line.
>   * Lock up under high load.
>   * Errors on guest poweroff.
>   * High relative latency compare to VFIO without IOMMU.

Adding (possibly) interested developers to the thread.

Thanks,
Marcel

>
> Signed-off-by: Aviv B.D. <bd.aviv@gmail.com <mailto:bd.aviv@gmail.com>>
> ---
>   hw/i386/intel_iommu.c          | 76 ++++++++++++++++++++++++++++++++++++++----
>   hw/i386/intel_iommu_internal.h |  1 +
>   hw/vfio/common.c               | 12 +++++--
>   include/hw/i386/intel_iommu.h  |  4 +++
>   include/hw/vfio/vfio-common.h  |  1 +
>   5 files changed, 85 insertions(+), 9 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 347718f..046688f 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
>   #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>   #endif
> +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
> +                                    uint8_t devfn, VTDContextEntry *ce);
> +
>   static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                               uint64_t wmask, uint64_t w1cmask)
>   {
> @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
>       return new_val;
>   }
> +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num, uint8_t devfn)
> +{
> +    VTDContextEntry ce;
> +    int ret_fr;
> +
> +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
> +    if (ret_fr){
> +        return -1;
> +    }
> +
> +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
> +}
> +
>   static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
>                                           uint64_t clear, uint64_t mask)
>   {
> @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
>       }
>       if (!vtd_context_entry_present(ce)) {
> -        VTD_DPRINTF(GENERAL,
> +        /*VTD_DPRINTF(GENERAL,
>                       "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
> -                    "is not present", devfn, bus_num);
> +                    "is not present", devfn, bus_num);*/
>           return -VTD_FR_CONTEXT_ENTRY_P;
>       } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
>                  (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
> @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
>                                         hwaddr addr, uint8_t am)
>   {
>       VTDIOTLBPageInvInfo info;
> +    VFIOGuestIOMMU * giommu;
> +    bool flag = false;
>       assert(am <= VTD_MAMV);
>       info.domain_id = domain_id;
>       info.addr = addr;
>       info.mask = ~((1 << am) - 1);
> +
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, iommu);
> +        uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num(vtd_as->bus), vtd_as->devfn);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 &&
> +                domain_id == vfio_domain_id){
> +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s, vfio_source_id, addr);
> +            if (iotlb_entry != NULL){
> +                IOMMUTLBEntry entry;
> +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask %d", addr, am);
> +                entry.iova = addr & VTD_PAGE_MASK_4K;
> +                entry.translated_addr = vtd_get_slpte_addr(iotlb_entry->slpte) & VTD_PAGE_MASK_4K;
> +                entry.addr_mask = ~VTD_PAGE_MASK_4K;
> +                entry.perm = IOMMU_NONE;
> +                memory_region_notify_iommu(giommu->iommu, entry);
> +                flag = true;
> +
> +            }
> +        }
> +
> +    }
> +
>       g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
> -}
> +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
> +        VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, iommu);
> +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num(vtd_as->bus), vtd_as->devfn);
> +        if (vfio_domain_id != (uint16_t)-1 &&
> +                domain_id == vfio_domain_id && !flag){
> +            /* do vfio map */
> +            VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr, am);
> +            /* call to vtd_iommu_translate */
> +            IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu, addr, 0);
> +            entry.perm = IOMMU_RW;
> +            memory_region_notify_iommu(giommu->iommu, entry);
> +            //g_vfio_iommu->n.notify(&g_vfio_iommu->n, &entry);
> +        }
> +    }
> +}
>   /* Flush IOTLB
>    * Returns the IOTLB Actual Invalidation Granularity.
>    * @val: the content of the IOTLB_REG
> @@ -1895,6 +1951,13 @@ static Property vtd_properties[] = {
>       DEFINE_PROP_END_OF_LIST(),
>   };
> +void vtd_register_giommu(VFIOGuestIOMMU * giommu)
> +{
> +    VTDAddressSpace *vtd_as = container_of(giommu->iommu, VTDAddressSpace, iommu);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +
> +    QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
> +}
>   VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>   {
> @@ -1949,7 +2012,8 @@ static void vtd_init(IntelIOMMUState *s)
>       s->iq_last_desc_type = VTD_INV_DESC_NONE;
>       s->next_frcd_reg = 0;
>       s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
> -             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
> +             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
> +             VTD_CAP_CM;
>       s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>       vtd_reset_context_cache(s);
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index e5f514c..ae40f73 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -190,6 +190,7 @@
>   #define VTD_CAP_MAMV                (VTD_MAMV << 48)
>   #define VTD_CAP_PSI                 (1ULL << 39)
>   #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
> +#define VTD_CAP_CM                  (1ULL << 7)
>   /* Supported Adjusted Guest Address Widths */
>   #define VTD_CAP_SAGAW_SHIFT         8
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 607ec70..98c8d67 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -32,6 +32,9 @@
>   #include "sysemu/kvm.h"
>   #include "trace.h"
> +#include "hw/sysbus.h"
> +#include "hw/i386/intel_iommu.h"
> +
>   struct vfio_group_head vfio_group_list =
>       QLIST_HEAD_INITIALIZER(vfio_group_list);
>   struct vfio_as_head vfio_address_spaces =
> @@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>   out:
>       rcu_read_unlock();
>   }
> -
> +#if 0
>   static hwaddr vfio_container_granularity(VFIOContainer *container)
>   {
>       return (hwaddr)1 << ctz64(container->iova_pgsizes);
>   }
> -
> +#endif
>   static void vfio_listener_region_add(MemoryListener *listener,
>                                        MemoryRegionSection *section)
>   {
> @@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>       iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>       llend = int128_make64(section->offset_within_address_space);
>       llend = int128_add(llend, section->size);
> +    llend = int128_add(llend, int128_exts64(-1));
>       llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>       if (int128_ge(int128_make64(iova), llend)) {
> @@ -381,11 +385,13 @@ static void vfio_listener_region_add(MemoryListener *listener,
>           giommu->n.notify = vfio_iommu_map_notify;
>           QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> +        vtd_register_giommu(giommu);
>           memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +#if 0
>           memory_region_iommu_replay(giommu->iommu, &giommu->n,
>                                      vfio_container_granularity(container),
>                                      false);
> -
> +#endif
>           return;
>       }
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index b024ffa..22f3f83 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -23,6 +23,7 @@
>   #define INTEL_IOMMU_H
>   #include "hw/qdev.h"
>   #include "sysemu/dma.h"
> +#include "hw/vfio/vfio-common.h"
>   #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
>   #define INTEL_IOMMU_DEVICE(obj) \
> @@ -123,6 +124,8 @@ struct IntelIOMMUState {
>       MemoryRegionIOMMUOps iommu_ops;
>       GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
>       VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
> +
> +    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>   };
>   /* Find the VTD Address space associated with the given bus pointer,
> @@ -130,4 +133,5 @@ struct IntelIOMMUState {
>    */
>   VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn);
> +void vtd_register_giommu(VFIOGuestIOMMU * giommu);
>   #endif
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index f037f3c..9225ba3 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
>       MemoryRegion *iommu;
>       Notifier n;
>       QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
> +    QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
>   } VFIOGuestIOMMU;
>   typedef struct VFIODeviceOps VFIODeviceOps;
> --
> 1.9.1

  reply	other threads:[~2016-03-14 18:52 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-12 16:13 [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present Aviv B.D.
2016-03-14 18:52 ` Marcel Apfelbaum [this message]
2016-03-14 18:58   ` Jan Kiszka
2016-03-15  7:00     ` Michael S. Tsirkin
2016-03-15  8:52   ` Peter Xu
2016-03-17 11:17     ` Aviv B.D.
2016-03-18  3:06       ` Peter Xu
2016-03-19  9:40         ` Aviv B.D.
2016-03-21  2:30           ` Peter Xu
2016-03-22  8:13             ` Aviv B.D.
2016-03-15 10:53 ` Michael S. Tsirkin
2016-03-17 11:58   ` Aviv B.D.
2016-03-23 14:34     ` Michael S. Tsirkin
2016-03-23 14:33 ` Michael S. Tsirkin
2016-03-26 14:47   ` Aviv B.D.

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56E70871.3050305@gmail.com \
    --to=marcel.apfelbaum@gmail.com \
    --cc=alex.williamson@redhat.com \
    --cc=bd.aviv@gmail.com \
    --cc=jan.kiszka@siemens.com \
    --cc=marcel@redhat.com \
    --cc=mst@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).