qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
To: qemu-devel@nongnu.org
Cc: alex.williamson@redhat.com, clg@redhat.com, jgg@nvidia.com,
	nicolinc@nvidia.com, joao.m.martins@oracle.com,
	eric.auger@redhat.com, peterx@redhat.com, jasowang@redhat.com,
	kevin.tian@intel.com, yi.l.liu@intel.com, yi.y.sun@intel.com,
	chao.p.peng@intel.com, Zhenzhong Duan <zhenzhong.duan@intel.com>
Subject: [PATCH v2 21/27] vfio/pci: Adapt vfio pci hot reset support with iommufd BE
Date: Mon, 16 Oct 2023 16:32:17 +0800	[thread overview]
Message-ID: <20231016083223.1519410-22-zhenzhong.duan@intel.com> (raw)
In-Reply-To: <20231016083223.1519410-1-zhenzhong.duan@intel.com>

As pci hot reset path need to reference pci specific functions
and data structures, adding container level callback functions
for legacy and iommufd BE and referencing those pci specific
func/data is no better than implementing reset support with
iommufd BE directly in pci.c

This way we can also share the common bus reset and system reset
path for both BEs.

A help function vfio_pci_get_pci_hot_reset_info() is extracted out
for usage by both BEs.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/vfio/pci.c        | 212 +++++++++++++++++++++++++++++++++++++++----
 hw/vfio/trace-events |   1 +
 2 files changed, 196 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b27011cee7..24fc047423 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -42,6 +42,7 @@
 #include "qapi/error.h"
 #include "migration/blocker.h"
 #include "migration/qemu-file.h"
+#include "linux/iommufd.h"
 
 #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
 
@@ -2445,22 +2446,13 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name)
     return (strcmp(tmp, name) == 0);
 }
 
-static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
+static int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev,
+                                       struct vfio_pci_hot_reset_info **info_p)
 {
-    VFIOGroup *group;
     struct vfio_pci_hot_reset_info *info;
-    struct vfio_pci_dependent_device *devices;
-    struct vfio_pci_hot_reset *reset;
-    int32_t *fds;
-    int ret, i, count;
-    bool multi = false;
+    int ret, count;
 
-    trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
-
-    if (!single) {
-        vfio_pci_pre_reset(vdev);
-    }
-    vdev->vbasedev.needs_reset = false;
+    assert(info_p && !*info_p);
 
     info = g_malloc0(sizeof(*info));
     info->argsz = sizeof(*info);
@@ -2468,24 +2460,53 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
     if (ret && errno != ENOSPC) {
         ret = -errno;
+        g_free(info);
         if (!vdev->has_pm_reset) {
             error_report("vfio: Cannot reset device %s, "
                          "no available reset mechanism.", vdev->vbasedev.name);
         }
-        goto out_single;
+        return ret;
     }
 
     count = info->count;
-    info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-    info->argsz = sizeof(*info) + (count * sizeof(*devices));
-    devices = &info->devices[0];
+    info = g_realloc(info, sizeof(*info) + (count * sizeof(info->devices[0])));
+    info->argsz = sizeof(*info) + (count * sizeof(info->devices[0]));
 
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
     if (ret) {
         ret = -errno;
+        g_free(info);
         error_report("vfio: hot reset info failed: %m");
+        return ret;
+    }
+
+    *info_p = info;
+    return 0;
+}
+
+static int vfio_pci_hot_reset_legacy(VFIOPCIDevice *vdev, bool single)
+{
+    VFIOGroup *group;
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    struct vfio_pci_hot_reset *reset;
+    int32_t *fds;
+    int ret, i, count;
+    bool multi = false;
+
+    trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+    if (!single) {
+        vfio_pci_pre_reset(vdev);
+    }
+    vdev->vbasedev.needs_reset = false;
+
+    ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+    if (ret) {
         goto out_single;
     }
+    devices = &info->devices[0];
 
     trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
@@ -2627,6 +2648,163 @@ out_single:
     return ret;
 }
 
+#ifdef CONFIG_IOMMUFD
+static VFIODevice *vfio_pci_find_by_iommufd_devid(__u32 devid)
+{
+    VFIODevice *vbasedev_iter;
+    VFIOIOMMUBackendOpsClass *ops = VFIO_IOMMU_BACKEND_OPS_CLASS(
+        object_class_by_name(TYPE_VFIO_IOMMU_BACKEND_IOMMUFD_OPS));
+
+    QLIST_FOREACH(vbasedev_iter, &vfio_device_list, global_next) {
+        if (vbasedev_iter->bcontainer->ops != ops) {
+            continue;
+        }
+        if (devid == vbasedev_iter->devid) {
+            return vbasedev_iter;
+        }
+    }
+    return NULL;
+}
+
+static int vfio_pci_hot_reset_iommufd(VFIOPCIDevice *vdev, bool single)
+{
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    struct vfio_pci_hot_reset *reset;
+    int ret, i;
+    bool multi = false;
+
+    trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
+
+    if (!single) {
+        vfio_pci_pre_reset(vdev);
+    }
+    vdev->vbasedev.needs_reset = false;
+
+    ret = vfio_pci_get_pci_hot_reset_info(vdev, &info);
+
+    if (ret) {
+        goto out_single;
+    }
+
+    assert(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID);
+
+    devices = &info->devices[0];
+
+    if (!(info->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED)) {
+        if (!vdev->has_pm_reset) {
+            for (i = 0; i < info->count; i++) {
+                if (devices[i].devid == VFIO_PCI_DEVID_NOT_OWNED) {
+                    error_report("vfio: Cannot reset device %s, "
+                                 "depends on device %04x:%02x:%02x.%x "
+                                 "which is not owned.",
+                                 vdev->vbasedev.name, devices[i].segment,
+                                 devices[i].bus, PCI_SLOT(devices[i].devfn),
+                                 PCI_FUNC(devices[i].devfn));
+                }
+            }
+        }
+        ret = -EPERM;
+        goto out_single;
+    }
+
+    trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
+
+    for (i = 0; i < info->count; i++) {
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+
+        trace_vfio_pci_hot_reset_dep_devices_iommufd(devices[i].segment,
+                                             devices[i].bus,
+                                             PCI_SLOT(devices[i].devfn),
+                                             PCI_FUNC(devices[i].devfn),
+                                             devices[i].devid);
+
+        /*
+         * If a VFIO cdev device is resettable, all the dependent devices
+         * are either bound to same iommufd or within same iommu_groups as
+         * one of the iommufd bound devices.
+         */
+        assert(devices[i].devid != VFIO_PCI_DEVID_NOT_OWNED);
+
+        if (devices[i].devid == vdev->vbasedev.devid ||
+            devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+            continue;
+        }
+
+        vbasedev_iter = vfio_pci_find_by_iommufd_devid(devices[i].devid);
+        if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+            vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+            continue;
+        }
+        tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+        if (single) {
+            ret = -EINVAL;
+            goto out_single;
+        }
+        vfio_pci_pre_reset(tmp);
+        tmp->vbasedev.needs_reset = false;
+        multi = true;
+    }
+
+    if (!single && !multi) {
+        ret = -EINVAL;
+        goto out_single;
+    }
+
+    /* Use zero length array for hot reset with iommufd backend */
+    reset = g_malloc0(sizeof(*reset));
+    reset->argsz = sizeof(*reset);
+
+     /* Bus reset! */
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+    g_free(reset);
+
+    trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
+                                    ret ? strerror(errno) : "Success");
+
+    /* Re-enable INTx on affected devices */
+    for (i = 0; i < info->count; i++) {
+        VFIOPCIDevice *tmp;
+        VFIODevice *vbasedev_iter;
+
+        if (devices[i].devid == vdev->vbasedev.devid ||
+            devices[i].devid == VFIO_PCI_DEVID_OWNED) {
+            continue;
+        }
+
+        vbasedev_iter = vfio_pci_find_by_iommufd_devid(devices[i].devid);
+        if (!vbasedev_iter || !vbasedev_iter->dev->realized ||
+            vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+            continue;
+        }
+        tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+        vfio_pci_post_reset(tmp);
+    }
+out_single:
+    if (!single) {
+        vfio_pci_post_reset(vdev);
+    }
+    g_free(info);
+
+    return ret;
+}
+#endif
+
+static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
+{
+#ifdef CONFIG_IOMMUFD
+    if (vdev->vbasedev.iommufd) {
+        return vfio_pci_hot_reset_iommufd(vdev, single);
+    } else
+#endif
+    {
+        return vfio_pci_hot_reset_legacy(vdev, single);
+    }
+}
+
+
+
 /*
  * We want to differentiate hot reset of multiple in-use devices vs hot reset
  * of a single in-use device.  VFIO_DEVICE_RESET will already handle the case
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 9b180cf77c..71c5840636 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -34,6 +34,7 @@ vfio_check_af_flr(const char *name) "%s Supports FLR via AF cap"
 vfio_pci_hot_reset(const char *name, const char *type) " (%s) %s"
 vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent devices:"
 vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d"
+vfio_pci_hot_reset_dep_devices_iommufd(int domain, int bus, int slot, int function, int dev_id) "\t%04x:%02x:%02x.%x devid %d"
 vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s"
 vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n  size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
 vfio_populate_device_get_irq_info_failure(const char *errstr) "VFIO_DEVICE_GET_IRQ_INFO failure: %s"
-- 
2.34.1



  parent reply	other threads:[~2023-10-16  8:51 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-16  8:31 [PATCH v2 00/27] vfio: Adopt iommufd Zhenzhong Duan
2023-10-16  8:31 ` [PATCH v2 01/27] vfio: Rename VFIOContainer into VFIOLegacyContainer Zhenzhong Duan
2023-10-17 15:50   ` Cédric Le Goater
2023-10-18  2:33     ` Duan, Zhenzhong
2023-10-16  8:31 ` [PATCH v2 02/27] vfio: Introduce base object for VFIOContainer and targetted interface Zhenzhong Duan
2023-10-17 15:51   ` Cédric Le Goater
2023-10-18  2:41     ` Duan, Zhenzhong
2023-10-18  8:04       ` Cédric Le Goater
2023-10-19  2:29         ` Duan, Zhenzhong
2023-10-19 12:17           ` Cédric Le Goater
2023-10-20  5:48             ` Duan, Zhenzhong
2023-10-20  8:19               ` Eric Auger
2023-10-20  8:28                 ` Duan, Zhenzhong
2023-10-23 15:28                 ` Cédric Le Goater
2023-10-24  6:03                   ` Duan, Zhenzhong
2023-10-24  6:51                     ` Cédric Le Goater
2023-10-16  8:31 ` [PATCH v2 03/27] VFIO/container: Introduce dummy VFIOContainerClass implementation Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 04/27] vfio/container: Switch to dma_map|unmap API Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 05/27] vfio/common: Move giommu_list in base container Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 06/27] vfio/container: Move space field to " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 07/27] vfio/container: switch to IOMMU BE add/del_section_window Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 08/27] vfio/container: Move hostwin_list in base container Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 09/27] vfio/container: Switch to IOMMU BE set_dirty_page_tracking/query_dirty_bitmap API Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 10/27] vfio/container: Move per container device list in base container Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 11/27] vfio/container: Convert functions to " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 12/27] vfio/container: Move vrdl_list, pgsizes and dma_max_mappings " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 13/27] vfio/container: Move listener " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 14/27] vfio/container: Move dirty_pgsizes and max_dirty_bitmap_size " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 15/27] vfio/container: Implement attach/detach_device Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 16/27] Add iommufd configure option Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 17/27] backends/iommufd: Introduce the iommufd object Zhenzhong Duan
2023-10-16 10:00   ` Markus Armbruster
2023-10-17  8:27     ` Duan, Zhenzhong
2023-10-16  8:32 ` [PATCH v2 18/27] util/char_dev: Add open_cdev() Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 19/27] vfio/iommufd: Implement the iommufd backend Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 20/27] vfio/container: Bypass EEH if " Zhenzhong Duan
2023-10-16  8:32 ` Zhenzhong Duan [this message]
2023-10-16  8:32 ` [PATCH v2 22/27] vfio/pci: Allow the selection of a given iommu backend Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 23/27] vfio/pci: Make vfio cdev pre-openable by passing a file handle Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 24/27] vfio: Allow the selection of a given iommu backend for platform ap and ccw Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 25/27] vfio/platform: Make vfio cdev pre-openable by passing a file handle Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 26/27] vfio/ap: " Zhenzhong Duan
2023-10-16  8:32 ` [PATCH v2 27/27] vfio/ccw: " Zhenzhong Duan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231016083223.1519410-22-zhenzhong.duan@intel.com \
    --to=zhenzhong.duan@intel.com \
    --cc=alex.williamson@redhat.com \
    --cc=chao.p.peng@intel.com \
    --cc=clg@redhat.com \
    --cc=eric.auger@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=jgg@nvidia.com \
    --cc=joao.m.martins@oracle.com \
    --cc=kevin.tian@intel.com \
    --cc=nicolinc@nvidia.com \
    --cc=peterx@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=yi.l.liu@intel.com \
    --cc=yi.y.sun@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).