qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Liu Yi L <yi.l.liu@intel.com>
To: qemu-devel@nongnu.org, alex.williamson@redhat.com,
	peterx@redhat.com, jasowang@redhat.com
Cc: jean-philippe@linaro.org, kevin.tian@intel.com,
	yi.l.liu@intel.com, Yi Sun <yi.y.sun@linux.intel.com>,
	kvm@vger.kernel.org, mst@redhat.com, jun.j.tian@intel.com,
	eric.auger@redhat.com, yi.y.sun@intel.com,
	Jacob Pan <jacob.jun.pan@linux.intel.com>,
	pbonzini@redhat.com, Lingshan.Zhu@intel.com, hao.wu@intel.com,
	david@gibson.dropbear.id.au
Subject: [RFC v11 13/25] vfio: init HostIOMMUContext per-container
Date: Wed,  3 Mar 2021 04:38:15 +0800	[thread overview]
Message-ID: <20210302203827.437645-14-yi.l.liu@intel.com> (raw)
In-Reply-To: <20210302203827.437645-1-yi.l.liu@intel.com>

In this patch, QEMU firstly gets iommu info from kernel to check the
supported capabilities by a VFIO_IOMMU_TYPE1_NESTING iommu. And inits
HostIOMMUContet instance.

For vfio-pci devices, it could use pci_device_set/unset_iommu() to
expose host iommu context to vIOMMU emulators. vIOMMU emulators
could make use of the methods provided by host iommu context. e.g.
propagate requests to host iommu.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Eric Auger <eric.auger@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/vfio/common.c              | 135 +++++++++++++++++++++++++---------
 hw/vfio/pci.c                 |  17 +++++
 include/hw/vfio/vfio-common.h |   1 +
 3 files changed, 118 insertions(+), 35 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 433938c245..a12708bcb7 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1623,41 +1623,11 @@ static int vfio_host_iommu_ctx_unbind_stage1_pgtbl(HostIOMMUContext *iommu_ctx,
     return ret;
 }
 
-static int vfio_init_container(VFIOContainer *container, int group_fd,
-                               bool want_nested, Error **errp)
-{
-    int iommu_type, ret;
-
-    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
-    if (iommu_type < 0) {
-        return iommu_type;
-    }
-
-    ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
-    if (ret) {
-        error_setg_errno(errp, errno, "Failed to set group container");
-        return -errno;
-    }
-
-    while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
-        if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
-            /*
-             * On sPAPR, despite the IOMMU subdriver always advertises v1 and
-             * v2, the running platform may not support v2 and there is no
-             * way to guess it until an IOMMU group gets added to the container.
-             * So in case it fails with v2, try v1 as a fallback.
-             */
-            iommu_type = VFIO_SPAPR_TCE_IOMMU;
-            continue;
-        }
-        error_setg_errno(errp, errno, "Failed to set iommu for container");
-        return -errno;
-    }
-
-    container->iommu_type = iommu_type;
-    return 0;
-}
-
+/**
+ * Get iommu info from host. Caller of this funcion should free
+ * the memory pointed by the returned pointer stored in @info
+ * after a successful calling when finished its usage.
+ */
 static int vfio_get_iommu_info(VFIOContainer *container,
                                struct vfio_iommu_type1_info **info)
 {
@@ -1702,6 +1672,101 @@ vfio_get_iommu_info_cap(struct vfio_iommu_type1_info *info, uint16_t id)
     return NULL;
 }
 
+static int vfio_get_nesting_iommu_cap(VFIOContainer *container,
+                   struct vfio_iommu_type1_info_cap_nesting **cap_nesting)
+{
+    struct vfio_iommu_type1_info *info;
+    struct vfio_info_cap_header *hdr;
+    struct vfio_iommu_type1_info_cap_nesting *cap;
+    struct iommu_nesting_info *nest_info;
+    int ret;
+    uint32_t minsz, cap_size;
+
+    ret = vfio_get_iommu_info(container, &info);
+    if (ret) {
+        return ret;
+    }
+
+    hdr = vfio_get_iommu_info_cap(info,
+                        VFIO_IOMMU_TYPE1_INFO_CAP_NESTING);
+    if (!hdr) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap = container_of(hdr,
+                struct vfio_iommu_type1_info_cap_nesting, header);
+
+    nest_info = &cap->info;
+    minsz = offsetof(struct iommu_nesting_info, vendor);
+    if (nest_info->argsz < minsz) {
+        g_free(info);
+        return -EINVAL;
+    }
+
+    cap_size = offsetof(struct vfio_iommu_type1_info_cap_nesting, info) +
+               nest_info->argsz;
+    *cap_nesting = g_malloc0(cap_size);
+    memcpy(*cap_nesting, cap, cap_size);
+
+    g_free(info);
+    return 0;
+}
+
+static int vfio_init_container(VFIOContainer *container, int group_fd,
+                               bool want_nested, Error **errp)
+{
+    int iommu_type, ret;
+
+    iommu_type = vfio_get_iommu_type(container, want_nested, errp);
+    if (iommu_type < 0) {
+        return iommu_type;
+    }
+
+    ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
+    if (ret) {
+        error_setg_errno(errp, errno, "Failed to set group container");
+        return -errno;
+    }
+
+    while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
+        if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+            /*
+             * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+             * v2, the running platform may not support v2 and there is no
+             * way to guess it until an IOMMU group gets added to the container.
+             * So in case it fails with v2, try v1 as a fallback.
+             */
+            iommu_type = VFIO_SPAPR_TCE_IOMMU;
+            continue;
+        }
+        error_setg_errno(errp, errno, "Failed to set iommu for container");
+        return -errno;
+    }
+
+    if (iommu_type == VFIO_TYPE1_NESTING_IOMMU) {
+        struct vfio_iommu_type1_info_cap_nesting *nesting = NULL;
+        struct iommu_nesting_info *nest_info;
+
+        ret = vfio_get_nesting_iommu_cap(container, &nesting);
+        if (ret) {
+            error_setg_errno(errp, -ret,
+                             "Failed to get nesting iommu cap");
+            return ret;
+        }
+
+        nest_info = (struct iommu_nesting_info *) &nesting->info;
+        host_iommu_ctx_init(&container->iommu_ctx,
+                            sizeof(container->iommu_ctx),
+                            TYPE_VFIO_HOST_IOMMU_CONTEXT,
+                            nest_info);
+        g_free(nesting);
+    }
+
+    container->iommu_type = iommu_type;
+    return 0;
+}
+
 static void vfio_get_iommu_info_migration(VFIOContainer *container,
                                          struct vfio_iommu_type1_info *info)
 {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 437f51338e..f5363589b6 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2764,6 +2764,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
     VFIODevice *vbasedev_iter;
     VFIOGroup *group;
+    VFIOContainer *container;
     char *tmp, *subsys, group_path[PATH_MAX], *group_name;
     Error *err = NULL;
     ssize_t len;
@@ -2829,6 +2830,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    container = group->container;
+    if (container->iommu_ctx.initialized &&
+        pci_device_set_iommu_context(pdev, &container->iommu_ctx)) {
+        error_setg(errp, "device attachment is denied by vIOMMU, "
+                   "please check host IOMMU nesting capability");
+        vfio_put_group(group);
+        goto error;
+    }
+
     QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
         if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
             error_setg(errp, "device is already attached");
@@ -3112,9 +3122,16 @@ static void vfio_instance_finalize(Object *obj)
 static void vfio_exitfn(PCIDevice *pdev)
 {
     VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+    VFIOContainer *container;
 
     vfio_unregister_req_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
+
+    container = vdev->vbasedev.group->container;
+    if (container->iommu_ctx.initialized) {
+        pci_device_unset_iommu_context(pdev);
+    }
+
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     if (vdev->irqchip_change_notifier.notify) {
         kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 55241ee270..5a9f2b6325 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -85,6 +85,7 @@ typedef struct VFIOContainer {
     MemoryListener listener;
     MemoryListener prereg_listener;
     unsigned iommu_type;
+    HostIOMMUContext iommu_ctx;
     Error *error;
     bool initialized;
     bool dirty_pages_supported;
-- 
2.25.1



  parent reply	other threads:[~2021-03-02 12:57 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-02 20:38 [RFC v11 00/25] intel_iommu: expose Shared Virtual Addressing to VMs Liu Yi L
2021-03-02 20:38 ` [RFC v11 01/25] scripts/update-linux-headers: Import iommu.h Liu Yi L
2021-03-02 20:38 ` [RFC v11 02/25] scripts/update-linux-headers: Import ioasid.h Liu Yi L
2021-03-02 20:38 ` [RFC v11 03/25] header file update VFIO/IOMMU vSVA APIs kernel 5.12-rc1 Liu Yi L
2021-03-02 20:38 ` [RFC v11 04/25] hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps Liu Yi L
2021-03-02 20:38 ` [RFC v11 05/25] hw/pci: introduce pci_device_get_iommu_attr() Liu Yi L
2021-03-02 20:38 ` [RFC v11 06/25] intel_iommu: add get_iommu_attr() callback Liu Yi L
2021-03-02 20:38 ` [RFC v11 07/25] vfio: pass nesting requirement into vfio_get_group() Liu Yi L
2021-03-02 20:38 ` [RFC v11 08/25] vfio: check VFIO_TYPE1_NESTING_IOMMU support Liu Yi L
2021-03-02 20:38 ` [RFC v11 09/25] hw/iommu: introduce HostIOMMUContext Liu Yi L
2021-03-02 20:38 ` [RFC v11 10/25] hw/pci: introduce pci_device_set/unset_iommu_context() Liu Yi L
2021-03-02 20:38 ` [RFC v11 11/25] intel_iommu: add set/unset_iommu_context callback Liu Yi L
2021-03-02 20:38 ` [RFC v11 12/25] vfio: add HostIOMMUContext support Liu Yi L
2021-03-02 20:38 ` Liu Yi L [this message]
2021-03-02 20:38 ` [RFC v11 14/25] intel_iommu: sync IOMMU nesting cap info for assigned devices Liu Yi L
2021-03-02 20:38 ` [RFC v11 15/25] intel_iommu: add virtual command capability support Liu Yi L
2021-03-02 20:38 ` [RFC v11 16/25] intel_iommu: process PASID cache invalidation Liu Yi L
2021-03-02 20:38 ` [RFC v11 17/25] intel_iommu: add PASID cache management infrastructure Liu Yi L
2021-03-02 20:38 ` [RFC v11 18/25] intel_iommu: bind/unbind guest page table to host Liu Yi L
2021-03-02 20:38 ` [RFC v11 19/25] intel_iommu: replay pasid binds after context cache invalidation Liu Yi L
2021-03-02 20:38 ` [RFC v11 20/25] intel_iommu: do not pass down pasid bind for PASID #0 Liu Yi L
2021-03-02 20:38 ` [RFC v11 21/25] vfio: add support for flush iommu stage-1 cache Liu Yi L
2021-03-02 20:38 ` [RFC v11 22/25] intel_iommu: process PASID-based iotlb invalidation Liu Yi L
2021-03-02 20:38 ` [RFC v11 23/25] intel_iommu: propagate PASID-based iotlb invalidation to host Liu Yi L
2021-03-02 20:38 ` [RFC v11 24/25] intel_iommu: process PASID-based Device-TLB invalidation Liu Yi L
2021-03-02 20:38 ` [RFC v11 25/25] intel_iommu: modify x-scalable-mode to be string option Liu Yi L

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210302203827.437645-14-yi.l.liu@intel.com \
    --to=yi.l.liu@intel.com \
    --cc=Lingshan.Zhu@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=eric.auger@redhat.com \
    --cc=hao.wu@intel.com \
    --cc=jacob.jun.pan@linux.intel.com \
    --cc=jasowang@redhat.com \
    --cc=jean-philippe@linaro.org \
    --cc=jun.j.tian@intel.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.y.sun@intel.com \
    --cc=yi.y.sun@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).