From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54407) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ynf77-0005Ad-0d for qemu-devel@nongnu.org; Wed, 29 Apr 2015 23:26:14 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Ynf73-0000OL-Fw for qemu-devel@nongnu.org; Wed, 29 Apr 2015 23:26:12 -0400 Received: from mx1.redhat.com ([209.132.183.28]:38079) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ynf73-0000NR-5g for qemu-devel@nongnu.org; Wed, 29 Apr 2015 23:26:09 -0400 Message-ID: <1430364066.4472.237.camel@redhat.com> From: Alex Williamson Date: Wed, 29 Apr 2015 21:21:06 -0600 In-Reply-To: <55419C6C.6020903@cn.fujitsu.com> References: <1430328750.4472.215.camel@redhat.com> <55419C6C.6020903@cn.fujitsu.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Chen Fan Cc: izumi.taku@jp.fujitsu.com, qemu-devel@nongnu.org On Thu, 2015-04-30 at 11:07 +0800, Chen Fan wrote: > On 04/30/2015 01:32 AM, Alex Williamson wrote: > > On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote: > >> add host secondary bus reset for vfio when AER occurs, if reset failed, > >> we should stop vm. > >> > >> Signed-off-by: Chen Fan > >> --- > >> hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- > >> 1 file changed, 138 insertions(+), 13 deletions(-) > >> > >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > >> index 060fb47..619daed 100644 > >> --- a/hw/vfio/pci.c > >> +++ b/hw/vfio/pci.c > >> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice { > >> PCIHostDeviceAddress host; > >> EventNotifier err_notifier; > >> EventNotifier req_notifier; > >> + > >> + Notifier sec_bus_reset_notifier; > >> uint32_t features; > >> #define VFIO_FEATURE_ENABLE_VGA_BIT 0 > >> #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT) > >> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size) > >> return pos; > >> } > >> > >> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > >> + PCIHostDeviceAddress *host2) > >> +{ > >> + return (host1->domain == host2->domain && host1->bus == host2->bus && > >> + host1->slot == host2->slot && host1->function == host2->function); > >> +} > >> + > >> static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos) > >> { > >> uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP); > >> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos) > >> return 0; > >> } > >> > >> +static int vfio_aer_validate_devices(DeviceState *dev, > >> + void *opaque) > >> +{ > >> + VFIOPCIDevice *vdev; > >> + int i; > >> + bool found = false; > >> + struct vfio_pci_hot_reset_info *info = opaque; > >> + struct vfio_pci_dependent_device *devices = &info->devices[0]; > >> + > >> + if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) { > >> + return 0; > >> + } > >> + > >> + vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev)); > >> + for (i = 0; i < info->count; i++) { > >> + PCIHostDeviceAddress host; > >> + > >> + host.domain = devices[i].segment; > >> + host.bus = devices[i].bus; > >> + host.slot = PCI_SLOT(devices[i].devfn); > >> + host.function = PCI_FUNC(devices[i].devfn); > >> + > >> + if (vfio_pci_host_match(&host, &vdev->host)) { > >> + found = true; > >> + break; > >> + } > >> + } > >> + > >> + if (!found) { > >> + error_report("vfio: Cannot reset parent bus with AER supported," > >> + "depends on device %s which is not contained.", > >> + vdev->vbasedev.name); > >> + return -1; > >> + } > >> + > >> + return 0; > >> +} > >> + > >> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev) > >> +{ > >> + error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " > >> + "Please collect any data possible and then kill the guest", > >> + __func__, vdev->host.domain, vdev->host.bus, > >> + vdev->host.slot, vdev->host.function); > >> + > >> + vm_stop(RUN_STATE_INTERNAL_ERROR); > >> +} > >> + > >> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque) > >> +{ > >> + VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier); > >> + PCIDevice *pdev = &vdev->pdev; > >> + int ret, i; > >> + struct vfio_pci_hot_reset_info *info; > >> + struct vfio_pci_dependent_device *devices; > >> + VFIOGroup *group; > >> + > >> + if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) { > >> + return; > >> + } > >> + > >> + /* > >> + * Check the affected devices by virtual bus reset are contained in > >> + * the set of groups. > >> + */ > >> + ret = vfio_get_hot_reset_info(vdev, &info); > >> + if (ret < 0) { > >> + goto stop_vm; > >> + } > >> + > >> + devices = &info->devices[0]; > >> + > >> + /* Verify that we have all the groups required */ > >> + for (i = 0; i < info->count; i++) { > >> + PCIHostDeviceAddress host; > >> + > >> + host.domain = devices[i].segment; > >> + host.bus = devices[i].bus; > >> + host.slot = PCI_SLOT(devices[i].devfn); > >> + host.function = PCI_FUNC(devices[i].devfn); > >> + > >> + if (vfio_pci_host_match(&host, &vdev->host)) { > >> + continue; > >> + } > >> + > >> + QLIST_FOREACH(group, &vfio_group_list, next) { > >> + if (group->groupid == devices[i].group_id) { > >> + break; > >> + } > >> + } > >> + > >> + if (!group) { > >> + if (!vdev->has_pm_reset) { > > I'm not sure how has_pm_reset has anything to do with what we're testing > > here. Copy-paste error? > > > >> + error_report("vfio: Cannot reset device %s with AER supported," > >> + "depends on group %d which is not owned.", > >> + vdev->vbasedev.name, devices[i].group_id); > >> + } > >> + ret = -EPERM; > >> + goto stop_vm; > >> + } > >> + } > > > > The above verifies that all of the devices affected by the bus reset are > > owned by the VM. > > > >> + > >> + /* Verify that we have all the affected devices under the bus */ > >> + ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL, > >> + vfio_aer_validate_devices, > >> + NULL, info); > > And here we make sure that any vfio-pci devices affected by the bus > > reset are contained in the list of affected devices. What we're still > > missing is whether there are any affected devices that are exposed to > > the VM that are not covered in this bus walk. For instance, if I assign > > a multi-function device and place function 0 and 1 under separate, peer > > root ports, I believe the above tests will confirm that the VM owns the > > necessary groups and that the only vfio-pci device affected by the bus > > reset is in the list of affected devices, but it will not verify that > > the other function is not affected by the guest bus reset, but is > > affected by the host bus reset. > you are right. > > > > >> + if (ret < 0) { > >> + goto stop_vm; > >> + } > >> + > >> + > >> + /* bus reset! */ > >> + ret = vfio_pci_do_hot_reset(vdev, info); > >> + if (ret < 0) { > >> + goto stop_vm; > >> + } > >> + > >> + g_free(info); > >> + return; > >> + > >> +stop_vm: > >> + g_free(info); > >> + vfio_pci_vm_stop(vdev); > >> +} > >> + > >> static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size) > >> { > >> PCIDevice *pdev = &vdev->pdev; > >> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size) > >> pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4); > >> pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity); > >> > >> + vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset; > >> + pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier); > >> + > > Additionally, we're going to do a bus reset once for every vfio-pci > > device subordinate to the bus being reset. That's not very efficient. > > > > There are still a number of problems here. We're allowing users to > > configure their VMs however they wish and enable AER, then only when > > they do a bus reset (which we have no way to infer is related to an AER > > event) do we check and potentially halt the VM if guest mapping of the > > host topology prevents a bus reset. That seems sort of like making > > backups of your data, but never testing that you can recover from the > > backup until we need the data. I can't imagine that many users are > > going to be able to get this correct and they won't know it's incorrect > > until an AER event occurs. > > > > Also, Do we need to worry about guest root complex devices? The guest > > won't be able to perform a bus reset on the guest bus in that case. Is > > AER even valid for RC devices since they don't have a parent downstream > > port to signal the AER? This seems like another case where it's more > > likely that a user will create an invalid configuration than it is they > > will create a valid one, but we won't know until an AER occurs with the > > code structure here. > do you refer to the root complex device is the root port of root complex? > the root port can notify system itself, the PCI Express aer driver > reset the upstream link between root port and upstream port of > subordinate switch. so I think we don't need to care that. No, I'm saying that we can place an assigned device directly on pcie.0, the host bus, without using a root port. I don't know if we have any way to signal an AER event for root complex devices, but the guest certainly doesn't have a way to perform a bus reset on the host bus. Thanks, Alex > > Therefore I think that if the user specifies AER, we need to verify and > > enforce at that point, ie. the initfn, that a host bus reset is > > possible, that a guest reset is possible, and that a guest bus reset > > fully contains the VM visible host devices affected by the reset. > > > > I'd also like to see the patches ordered such that we provide all the > > infrastructure to validate and enforce the configuration restrictions to > > support AER before we enable it to the guest. The order here creates > > bisect points where AER is exposed to the guest with unacceptable QEMU > > handling. Thanks, > I'm sorry for that, due to this is RFC version I missed that, > and I will arrange the patches after this series. > > Thanks, > Chen > > > > > > Alex > > > >> return 0; > >> } > >> > >> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev) > >> vfio_enable_intx(vdev); > >> } > >> > >> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1, > >> - PCIHostDeviceAddress *host2) > >> -{ > >> - return (host1->domain == host2->domain && host1->bus == host2->bus && > >> - host1->slot == host2->slot && host1->function == host2->function); > >> -} > >> - > >> static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single) > >> { > >> VFIOGroup *group; > >> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque) > >> * terminate the guest to contain the error. > >> */ > >> > >> - error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. " > >> - "Please collect any data possible and then kill the guest", > >> - __func__, vdev->host.domain, vdev->host.bus, > >> - vdev->host.slot, vdev->host.function); > >> - > >> - vm_stop(RUN_STATE_INTERNAL_ERROR); > >> + vfio_pci_vm_stop(vdev); > >> } > >> > >> /* > > > > > > . > > >