From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54225) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ajEGN-0006Fy-9y for qemu-devel@nongnu.org; Thu, 24 Mar 2016 19:02:00 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ajEGK-0003p6-03 for qemu-devel@nongnu.org; Thu, 24 Mar 2016 19:01:59 -0400 Received: from mx1.redhat.com ([209.132.183.28]:60921) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ajEGJ-0003oX-O3 for qemu-devel@nongnu.org; Thu, 24 Mar 2016 19:01:55 -0400 Date: Thu, 24 Mar 2016 17:01:53 -0600 From: Alex Williamson Message-ID: <20160324170153.62e78887@t450s.home> In-Reply-To: <1458727702-15012-7-git-send-email-caoj.fnst@cn.fujitsu.com> References: <1458727702-15012-1-git-send-email-caoj.fnst@cn.fujitsu.com> <1458727702-15012-7-git-send-email-caoj.fnst@cn.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [patch v5 06/12] vfio: add check host bus reset is support or not List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Cao jin Cc: chen.fan.fnst@cn.fujitsu.com, izumi.taku@jp.fujitsu.com, qemu-devel@nongnu.org, mst@redhat.com [resending, first copy had the list address incorrect, apologies to those on the cc list] On Wed, 23 Mar 2016 18:08:16 +0800 Cao jin wrote: > From: Chen Fan > > When assigning a vfio device with AER enabled, we must check whether > the device supports a host bus reset (ie. hot reset) as this may be > used by the guest OS in order to recover the device from an AER > error. QEMU must therefore have the ability to perform a physical > host bus reset using the existing vfio APIs in response to a virtual > bus reset in the VM. A physical bus reset affects all of the devices > on the host bus, therefore we place a few simplifying configuration > restriction on the VM: > > - All physical devices affected by a bus reset must be assigned to > the VM with AER enabled on each and be configured on the same > virtual bus in the VM. > > - No devices unaffected by the bus reset, be they physical, emulated, > or paravirtual may be configured on the same virtual bus as a > device supporting AER signaling through vfio. > > In other words users wishing to enable AER on a multifunction device > need to assign all functions of the device to the same virtual bus > and enable AER support for each device. The easiest way to > accomplish this is to identity map the physical functions to virtual > functions with multifunction enabled on the virtual device. > > Signed-off-by: Chen Fan > --- > hw/vfio/pci.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- > hw/vfio/pci.h | 1 + > 2 files changed, 254 insertions(+), 23 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 5b23a86..939b764 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1716,6 +1716,41 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, uint8_t pos) > } > } > > +static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr) > +{ > + if (strlen(name) != 12 || > + sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain, > + &addr->bus, &addr->slot, &addr->function) != 4) { > + return -EINVAL; > + } > + > + return 0; > +} > + > +static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) > +{ > + PCIHostDeviceAddress tmp; > + > + if (vfio_pci_name_to_addr(name, &tmp)) { > + return false; > + } > + > + return (tmp.domain == addr->domain && tmp.bus == addr->bus && > + tmp.slot == addr->slot && tmp.function == addr->function); > +} > + > +static bool vfio_pci_host_match_slot(PCIHostDeviceAddress *addr, const char *name) > +{ > + PCIHostDeviceAddress tmp; > + > + if (vfio_pci_name_to_addr(name, &tmp)) { > + return false; > + } > + > + return (tmp.domain == addr->domain && tmp.bus == addr->bus && > + tmp.slot == addr->slot); > +} > + > /* > * return negative with errno, return 0 on success. > * if success, the point of ret_info fill with the affected device reset info. > @@ -1877,6 +1912,199 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) > return 0; > } > > +static int vfio_device_range_limit(PCIBus *bus) > +{ > + PCIDevice *br; > + > + br = pci_bridge_get_device(bus); > + if (!br || > + !pci_is_express(br) || > + !(br->exp.exp_cap) || > + pcie_cap_is_arifwd_enabled(br)) { > + return 255; > + } Perhaps something like vfio_pci_bus_devfn_limit() might be a better name. This probably also deserves a comment. If I understand correctly, you're trying to only return 8 for an express bus without ARI enabled. It seems like it would be easier to do that with the function pci_bus_is_express(). I think that would reduce to: if (!br || !pci_bus_is_express(bus) || pcie_cap_is_arifwd_enabled(br)) { return 255; } ... > + > + return 8; > +} > + > +static void vfio_check_hot_bus_reset(VFIOPCIDevice *vdev, Error **errp) > +{ > + PCIBus *bus = vdev->pdev.bus; > + struct vfio_pci_hot_reset_info *info = NULL; > + struct vfio_pci_dependent_device *devices; > + VFIOGroup *group; > + int ret, i, devfn, range_limit; > + > + ret = vfio_get_hot_reset_info(vdev, &info); > + if (ret) { > + error_setg(errp, "vfio: Cannot enable AER for device %s," > + " device does not support hot reset.", > + vdev->vbasedev.name); > + return; > + } > + > + /* List all affected devices by bus reset */ > + devices = &info->devices[0]; > + > + /* Verify that we have all the groups required */ > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + VFIOPCIDevice *tmp; > + VFIODevice *vbasedev_iter; > + bool found = false; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + /* Skip the current device */ > + if (vfio_pci_host_match(&host, vdev->vbasedev.name)) { > + continue; > + } > + > + /* Ensure we own the group of the affected device */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + if (group->groupid == devices[i].group_id) { > + break; > + } > + } > + > + if (!group) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "depends on group %d which is not owned.", > + vdev->vbasedev.name, devices[i].group_id); > + goto out; > + } > + > + /* Ensure affected devices for reset on the same bus */ > + QLIST_FOREACH(vbasedev_iter, &group->device_list, next) { > + if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); > + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { > + /* > + * AER errors may be broadcast to all functions of a multi- > + * function endpoint. If any of those sibling functions are > + * also assigned, they need to have AER enabled or else an > + * error may continue to cause a vm_stop condition. IOW, > + * AER setup of this function would be pointless. > + */ > + if (vfio_pci_host_match_slot(&host, vdev->vbasedev.name) && > + !(tmp->features & VFIO_FEATURE_ENABLE_AER)) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot" > + " the dependent device %s which does not enable AER.", > + vdev->vbasedev.name, tmp->vbasedev.name); > + goto out; > + } > + > + if (tmp->pdev.bus != bus) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "the dependent device %s is not on the same bus", > + vdev->vbasedev.name, tmp->vbasedev.name); > + goto out; > + } > + found = true; > + break; > + } > + } > + > + /* Ensure all affected devices assigned to VM */ > + if (!found) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, " > + "the dependent device %04x:%02x:%02x.%x " > + "is not assigned to VM.", > + vdev->vbasedev.name, host.domain, host.bus, > + host.slot, host.function); > + goto out; > + } > + } > + > + /* > + * The above code verified that all devices affected by a bus reset > + * exist on the same bus in the VM. To further simplify, we also > + * require that there are no additional devices beyond those existing on > + * the VM bus. > + */ > + range_limit = vfio_device_range_limit(bus); > + for (devfn = 0; devfn < range_limit; devfn++) { > + VFIOPCIDevice *tmp; > + PCIDevice *dev; > + bool found = false; > + > + dev = pci_find_device(bus, pci_bus_num(bus), > + PCI_DEVFN(PCI_SLOT(vdev->pdev.devfn), devfn)); This is incorrect, we're enumerating through all devfns on the bus now, we don't need PCI_DEVFN(). > + > + if (!dev) { > + continue; > + } > + > + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, device" > + " %s: slot %d function%d cannot be configured" Can we say "VM address %02x.%d" rather than this slot/function thing? > + " on the same virtual bus", > + vdev->vbasedev.name, dev->name, > + PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); > + goto out; > + } > + > + tmp = DO_UPCAST(VFIOPCIDevice, pdev, dev); > + for (i = 0; i < info->count; i++) { > + PCIHostDeviceAddress host; > + > + host.domain = devices[i].segment; > + host.bus = devices[i].bus; > + host.slot = PCI_SLOT(devices[i].devfn); > + host.function = PCI_FUNC(devices[i].devfn); > + > + if (vfio_pci_host_match(&host, tmp->vbasedev.name)) { > + found = true; > + break; > + } > + } > + > + if (!found) { > + error_setg(errp, "vfio: Cannot enable AER for device %s, affected" > + " device %s does not be configured on the same" > + " virtual bus", And use the same sort of comment here as above: "vfio: Cannot enable AER for device %s, vfio-pci device at VM address %02x.%d cannot be configured on the same virtual bus" > + vdev->vbasedev.name, tmp->vbasedev.name); > + goto out; > + } > + } > + > +out: > + g_free(info); > + return; > +} > + > +static void vfio_aer_check_host_bus_reset(Error **errp) > +{ > + VFIOGroup *group; > + VFIODevice *vbasedev; > + VFIOPCIDevice *vdev; > + Error *local_err = NULL; > + > + /* Check All vfio-pci devices if have bus reset capability */ > + QLIST_FOREACH(group, &vfio_group_list, next) { > + QLIST_FOREACH(vbasedev, &group->device_list, next) { > + if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) { > + continue; > + } > + vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + if (vdev->features & VFIO_FEATURE_ENABLE_AER) { > + vfio_check_hot_bus_reset(vdev, &local_err); > + if (local_err) { > + error_propagate(errp, local_err); > + return; > + } > + } > + } > + } > + > + return; > +} > + > static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver, > int pos, uint16_t size) > { > @@ -2060,29 +2288,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > vfio_intx_enable(vdev); > } > > -static int vfio_pci_name_to_addr(const char *name, PCIHostDeviceAddress *addr) > -{ > - if (strlen(name) != 12 || > - sscanf(name, "%04x:%02x:%02x.%1x", &addr->domain, > - &addr->bus, &addr->slot, &addr->function) != 4) { > - return -EINVAL; > - } > - > - return 0; > -} > - > -static bool vfio_pci_host_match(PCIHostDeviceAddress *addr, const char *name) > -{ > - PCIHostDeviceAddress tmp; > - > - if (vfio_pci_name_to_addr(name, &tmp)) { > - return false; > - } > - > - return (tmp.domain == addr->domain && tmp.bus == addr->bus && > - tmp.slot == addr->slot && tmp.function == addr->function); > -} > - > static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > { > VFIOGroup *group; > @@ -2589,6 +2794,22 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > vdev->req_enabled = false; > } > > +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused) > +{ > + Error *local_err = NULL; > + > + vfio_aer_check_host_bus_reset(&local_err); > + if (local_err) { > + fprintf(stderr, "%s\n", error_get_pretty(local_err)); > + error_free(local_err); > + exit(1); > + } > +} > + > +static Notifier machine_notifier = { > + .notify = vfio_pci_machine_done_notify, > +}; > + > static int vfio_initfn(PCIDevice *pdev) > { > VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); > @@ -2934,6 +3155,15 @@ static const TypeInfo vfio_pci_dev_info = { > static void register_vfio_pci_dev_type(void) > { > type_register_static(&vfio_pci_dev_info); > + > + /* > + * The AER configuration may depend on multiple devices, so we cannot > + * validate consistency after each device is initialized. We can only > + * depend on function initialization order (function 0 last) for hotplug > + * devices, therefore a machine-init-done notifier is used to validate > + * the configuration after all cold-plug devices are processed. > + */ > + qemu_add_machine_init_done_notifier(&machine_notifier); > } > > type_init(register_vfio_pci_dev_type) > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 7b3924e..db7c6d5 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -15,6 +15,7 @@ > #include "qemu-common.h" > #include "exec/memory.h" > #include "hw/pci/pci.h" > +#include "hw/pci/pci_bus.h" > #include "hw/pci/pci_bridge.h" > #include "hw/vfio/vfio-common.h" > #include "qemu/event_notifier.h"