All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alex Williamson <alex.williamson@redhat.com>
To: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Cc: izumi.taku@jp.fujitsu.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
Date: Wed, 29 Apr 2015 11:32:30 -0600	[thread overview]
Message-ID: <1430328750.4472.215.camel@redhat.com> (raw)
In-Reply-To: <b95cd227a99fdc382e0a9a64b1e9ded18c82ad3d.1430297161.git.chen.fan.fnst@cn.fujitsu.com>

On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
> add host secondary bus reset for vfio when AER occurs, if reset failed,
> we should stop vm.
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 138 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 060fb47..619daed 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
>      PCIHostDeviceAddress host;
>      EventNotifier err_notifier;
>      EventNotifier req_notifier;
> +
> +    Notifier sec_bus_reset_notifier;
>      uint32_t features;
>  #define VFIO_FEATURE_ENABLE_VGA_BIT 0
>  #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
>      return pos;
>  }
>  
> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> +                                PCIHostDeviceAddress *host2)
> +{
> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> +            host1->slot == host2->slot && host1->function == host2->function);
> +}
> +
>  static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
>  {
>      uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>      return 0;
>  }
>  
> +static int vfio_aer_validate_devices(DeviceState *dev,
> +                                     void *opaque)
> +{
> +    VFIOPCIDevice *vdev;
> +    int i;
> +    bool found = false;
> +    struct vfio_pci_hot_reset_info *info = opaque;
> +    struct vfio_pci_dependent_device *devices = &info->devices[0];
> +
> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> +        return 0;
> +    }
> +
> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            found = true;
> +            break;
> +        }
> +    }
> +
> +    if (!found) {
> +        error_report("vfio: Cannot reset parent bus with AER supported,"
> +                     "depends on device %s which is not contained.",
> +                      vdev->vbasedev.name);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
> +{
> +    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
> +                 "Please collect any data possible and then kill the guest",
> +                 __func__, vdev->host.domain, vdev->host.bus,
> +                 vdev->host.slot, vdev->host.function);
> +
> +    vm_stop(RUN_STATE_INTERNAL_ERROR);
> +}
> +
> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
> +{
> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
> +    PCIDevice *pdev = &vdev->pdev;
> +    int ret, i;
> +    struct vfio_pci_hot_reset_info *info;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +
> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +        return;
> +    }
> +
> +    /*
> +     * Check the affected devices by virtual bus reset are contained in
> +     * the set of groups.
> +     */
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +    devices = &info->devices[0];
> +
> +    /* Verify that we have all the groups required */
> +    for (i = 0; i < info->count; i++) {
> +        PCIHostDeviceAddress host;
> +
> +        host.domain = devices[i].segment;
> +        host.bus = devices[i].bus;
> +        host.slot = PCI_SLOT(devices[i].devfn);
> +        host.function = PCI_FUNC(devices[i].devfn);
> +
> +        if (vfio_pci_host_match(&host, &vdev->host)) {
> +            continue;
> +        }
> +
> +        QLIST_FOREACH(group, &vfio_group_list, next) {
> +            if (group->groupid == devices[i].group_id) {
> +                break;
> +            }
> +        }
> +
> +        if (!group) {
> +            if (!vdev->has_pm_reset) {

I'm not sure how has_pm_reset has anything to do with what we're testing
here.  Copy-paste error?

> +                error_report("vfio: Cannot reset device %s with AER supported,"
> +                             "depends on group %d which is not owned.",
> +                             vdev->vbasedev.name, devices[i].group_id);
> +            }
> +            ret = -EPERM;
> +            goto stop_vm;
> +        }
> +    }


The above verifies that all of the devices affected by the bus reset are
owned by the VM.

> +
> +    /* Verify that we have all the affected devices under the bus */
> +    ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
> +                             vfio_aer_validate_devices,
> +                             NULL, info);

And here we make sure that any vfio-pci devices affected by the bus
reset are contained in the list of affected devices.  What we're still
missing is whether there are any affected devices that are exposed to
the VM that are not covered in this bus walk.  For instance, if I assign
a multi-function device and place function 0 and 1 under separate, peer
root ports, I believe the above tests will confirm that the VM owns the
necessary groups and that the only vfio-pci device affected by the bus
reset is in the list of affected devices, but it will not verify that
the other function is not affected by the guest bus reset, but is
affected by the host bus reset.

> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +
> +    /* bus reset! */
> +    ret = vfio_pci_do_hot_reset(vdev, info);
> +    if (ret < 0) {
> +        goto stop_vm;
> +    }
> +
> +    g_free(info);
> +    return;
> +
> +stop_vm:
> +    g_free(info);
> +    vfio_pci_vm_stop(vdev);
> +}
> +
>  static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>  {
>      PCIDevice *pdev = &vdev->pdev;
> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>                     pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
>      pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
>  
> +    vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
> +    pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
> +

Additionally, we're going to do a bus reset once for every vfio-pci
device subordinate to the bus being reset.  That's not very efficient.

There are still a number of problems here.  We're allowing users to
configure their VMs however they wish and enable AER, then only when
they do a bus reset (which we have no way to infer is related to an AER
event) do we check and potentially halt the VM if guest mapping of the
host topology prevents a bus reset.  That seems sort of like making
backups of your data, but never testing that you can recover from the
backup until we need the data.  I can't imagine that many users are
going to be able to get this correct and they won't know it's incorrect
until an AER event occurs.

Also, Do we need to worry about guest root complex devices?  The guest
won't be able to perform a bus reset on the guest bus in that case.  Is
AER even valid for RC devices since they don't have a parent downstream
port to signal the AER?  This seems like another case where it's more
likely that a user will create an invalid configuration than it is they
will create a valid one, but we won't know until an AER occurs with the
code structure here.

Therefore I think that if the user specifies AER, we need to verify and
enforce at that point, ie. the initfn, that a host bus reset is
possible, that a guest reset is possible, and that a guest bus reset
fully contains the VM visible host devices affected by the reset.

I'd also like to see the patches ordered such that we provide all the
infrastructure to validate and enforce the configuration restrictions to
support AER before we enable it to the guest.  The order here creates
bisect points where AER is exposed to the guest with unacceptable QEMU
handling.  Thanks,

Alex

>      return 0;
>  }
>  
> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>      vfio_enable_intx(vdev);
>  }
>  
> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
> -                                PCIHostDeviceAddress *host2)
> -{
> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
> -            host1->slot == host2->slot && host1->function == host2->function);
> -}
> -
>  static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>  {
>      VFIOGroup *group;
> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
>       * terminate the guest to contain the error.
>       */
>  
> -    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
> -                 "Please collect any data possible and then kill the guest",
> -                 __func__, vdev->host.domain, vdev->host.bus,
> -                 vdev->host.slot, vdev->host.function);
> -
> -    vm_stop(RUN_STATE_INTERNAL_ERROR);
> +    vfio_pci_vm_stop(vdev);
>  }
>  
>  /*

  reply	other threads:[~2015-04-29 17:32 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-29  8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for " Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support " Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
2015-04-29 17:32   ` Alex Williamson [this message]
2015-04-30  3:07     ` Chen Fan
2015-04-30  3:21       ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1430328750.4472.215.camel@redhat.com \
    --to=alex.williamson@redhat.com \
    --cc=chen.fan.fnst@cn.fujitsu.com \
    --cc=izumi.taku@jp.fujitsu.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.