All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: izumi.taku@jp.fujitsu.com, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host bus reset
Date: Thu, 30 Apr 2015 11:07:24 +0800	[thread overview]
Message-ID: <55419C6C.6020903@cn.fujitsu.com> (raw)
In-Reply-To: <1430328750.4472.215.camel@redhat.com>


On 04/30/2015 01:32 AM, Alex Williamson wrote:
> On Wed, 2015-04-29 at 16:48 +0800, Chen Fan wrote:
>> add host secondary bus reset for vfio when AER occurs, if reset failed,
>> we should stop vm.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 138 insertions(+), 13 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 060fb47..619daed 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -154,6 +154,8 @@ typedef struct VFIOPCIDevice {
>>       PCIHostDeviceAddress host;
>>       EventNotifier err_notifier;
>>       EventNotifier req_notifier;
>> +
>> +    Notifier sec_bus_reset_notifier;
>>       uint32_t features;
>>   #define VFIO_FEATURE_ENABLE_VGA_BIT 0
>>   #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
>> @@ -2627,6 +2629,13 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int pos, uint8_t size)
>>       return pos;
>>   }
>>   
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> +                                PCIHostDeviceAddress *host2)
>> +{
>> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> +            host1->slot == host2->slot && host1->function == host2->function);
>> +}
>> +
>>   static void vfio_check_pcie_flr(VFIOPCIDevice *vdev, uint8_t pos)
>>   {
>>       uint32_t cap = pci_get_long(vdev->pdev.config + pos + PCI_EXP_DEVCAP);
>> @@ -2819,6 +2828,131 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>>       return 0;
>>   }
>>   
>> +static int vfio_aer_validate_devices(DeviceState *dev,
>> +                                     void *opaque)
>> +{
>> +    VFIOPCIDevice *vdev;
>> +    int i;
>> +    bool found = false;
>> +    struct vfio_pci_hot_reset_info *info = opaque;
>> +    struct vfio_pci_dependent_device *devices = &info->devices[0];
>> +
>> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +        return 0;
>> +    }
>> +
>> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, PCI_DEVICE(dev));
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        if (vfio_pci_host_match(&host, &vdev->host)) {
>> +            found = true;
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (!found) {
>> +        error_report("vfio: Cannot reset parent bus with AER supported,"
>> +                     "depends on device %s which is not contained.",
>> +                      vdev->vbasedev.name);
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void vfio_pci_vm_stop(VFIOPCIDevice *vdev)
>> +{
>> +    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected. "
>> +                 "Please collect any data possible and then kill the guest",
>> +                 __func__, vdev->host.domain, vdev->host.bus,
>> +                 vdev->host.slot, vdev->host.function);
>> +
>> +    vm_stop(RUN_STATE_INTERNAL_ERROR);
>> +}
>> +
>> +static void vfio_pci_host_bus_reset(Notifier *n, void *opaque)
>> +{
>> +    VFIOPCIDevice *vdev = container_of(n, VFIOPCIDevice, sec_bus_reset_notifier);
>> +    PCIDevice *pdev = &vdev->pdev;
>> +    int ret, i;
>> +    struct vfio_pci_hot_reset_info *info;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +
>> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Check the affected devices by virtual bus reset are contained in
>> +     * the set of groups.
>> +     */
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret < 0) {
>> +        goto stop_vm;
>> +    }
>> +
>> +    devices = &info->devices[0];
>> +
>> +    /* Verify that we have all the groups required */
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        if (vfio_pci_host_match(&host, &vdev->host)) {
>> +            continue;
>> +        }
>> +
>> +        QLIST_FOREACH(group, &vfio_group_list, next) {
>> +            if (group->groupid == devices[i].group_id) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!group) {
>> +            if (!vdev->has_pm_reset) {
> I'm not sure how has_pm_reset has anything to do with what we're testing
> here.  Copy-paste error?
>
>> +                error_report("vfio: Cannot reset device %s with AER supported,"
>> +                             "depends on group %d which is not owned.",
>> +                             vdev->vbasedev.name, devices[i].group_id);
>> +            }
>> +            ret = -EPERM;
>> +            goto stop_vm;
>> +        }
>> +    }
>
> The above verifies that all of the devices affected by the bus reset are
> owned by the VM.
>
>> +
>> +    /* Verify that we have all the affected devices under the bus */
>> +    ret = qbus_walk_children(BUS(pdev->bus), NULL, NULL,
>> +                             vfio_aer_validate_devices,
>> +                             NULL, info);
> And here we make sure that any vfio-pci devices affected by the bus
> reset are contained in the list of affected devices.  What we're still
> missing is whether there are any affected devices that are exposed to
> the VM that are not covered in this bus walk.  For instance, if I assign
> a multi-function device and place function 0 and 1 under separate, peer
> root ports, I believe the above tests will confirm that the VM owns the
> necessary groups and that the only vfio-pci device affected by the bus
> reset is in the list of affected devices, but it will not verify that
> the other function is not affected by the guest bus reset, but is
> affected by the host bus reset.
you are right.

>
>> +    if (ret < 0) {
>> +        goto stop_vm;
>> +    }
>> +
>> +
>> +    /* bus reset! */
>> +    ret = vfio_pci_do_hot_reset(vdev, info);
>> +    if (ret < 0) {
>> +        goto stop_vm;
>> +    }
>> +
>> +    g_free(info);
>> +    return;
>> +
>> +stop_vm:
>> +    g_free(info);
>> +    vfio_pci_vm_stop(vdev);
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>>   {
>>       PCIDevice *pdev = &vdev->pdev;
>> @@ -2852,6 +2986,9 @@ static int vfio_setup_aer(VFIOPCIDevice *vdev, int pos, uint16_t size)
>>                      pdev->exp.aer_cap + PCI_ERR_UNCOR_SEVER, 4);
>>       pci_long_test_and_clear_mask(exp_cap + PCI_ERR_UNCOR_SEVER, ~severity);
>>   
>> +    vdev->sec_bus_reset_notifier.notify = vfio_pci_host_bus_reset;
>> +    pci_bus_add_reset_notifier(pdev->bus, &vdev->sec_bus_reset_notifier);
>> +
> Additionally, we're going to do a bus reset once for every vfio-pci
> device subordinate to the bus being reset.  That's not very efficient.
>
> There are still a number of problems here.  We're allowing users to
> configure their VMs however they wish and enable AER, then only when
> they do a bus reset (which we have no way to infer is related to an AER
> event) do we check and potentially halt the VM if guest mapping of the
> host topology prevents a bus reset.  That seems sort of like making
> backups of your data, but never testing that you can recover from the
> backup until we need the data.  I can't imagine that many users are
> going to be able to get this correct and they won't know it's incorrect
> until an AER event occurs.
>
> Also, Do we need to worry about guest root complex devices?  The guest
> won't be able to perform a bus reset on the guest bus in that case.  Is
> AER even valid for RC devices since they don't have a parent downstream
> port to signal the AER?  This seems like another case where it's more
> likely that a user will create an invalid configuration than it is they
> will create a valid one, but we won't know until an AER occurs with the
> code structure here.
do you refer to the root complex device is the root port of root complex?
the root port can notify system itself, the PCI Express aer driver
reset the upstream link between root port and upstream port of
subordinate switch. so I think we don't need to care that.

>
> Therefore I think that if the user specifies AER, we need to verify and
> enforce at that point, ie. the initfn, that a host bus reset is
> possible, that a guest reset is possible, and that a guest bus reset
> fully contains the VM visible host devices affected by the reset.
>
> I'd also like to see the patches ordered such that we provide all the
> infrastructure to validate and enforce the configuration restrictions to
> support AER before we enable it to the guest.  The order here creates
> bisect points where AER is exposed to the guest with unacceptable QEMU
> handling.  Thanks,
I'm sorry for that, due to this is RFC version I missed that,
and  I will arrange the patches after this series.

Thanks,
Chen


>
> Alex
>
>>       return 0;
>>   }
>>   
>> @@ -2978,13 +3115,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>>       vfio_enable_intx(vdev);
>>   }
>>   
>> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> -                                PCIHostDeviceAddress *host2)
>> -{
>> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> -            host1->slot == host2->slot && host1->function == host2->function);
>> -}
>> -
>>   static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>>   {
>>       VFIOGroup *group;
>> @@ -3328,12 +3458,7 @@ static void vfio_err_notifier_handler(void *opaque)
>>        * terminate the guest to contain the error.
>>        */
>>   
>> -    error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
>> -                 "Please collect any data possible and then kill the guest",
>> -                 __func__, vdev->host.domain, vdev->host.bus,
>> -                 vdev->host.slot, vdev->host.function);
>> -
>> -    vm_stop(RUN_STATE_INTERNAL_ERROR);
>> +    vfio_pci_vm_stop(vdev);
>>   }
>>   
>>   /*
>
>
> .
>

  reply	other threads:[~2015-04-30  3:28 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-29  8:48 [Qemu-devel] [PATCH RFC v6 00/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 01/11] vfio: add pcie extanded capability support Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 02/11] aer: impove pcie_aer_init to support vfio device Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 03/11] vfio: add aer support for " Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 04/11] pcie_aer: expose pcie_aer_msg() interface Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 05/11] vfio-pci: pass the aer error to guest Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 06/11] vfio: add 'aer' property to expose aercap Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 07/11] pc: add HW_COMPAT_2_2 to disable aercap for vifo device Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 08/11] vfio: extract vfio_get_hot_reset_info as a single function Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 09/11] qdev: add bus reset_notifiers callbacks for host bus reset Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 10/11] vfio: squeeze out vfio_pci_do_hot_reset for support " Chen Fan
2015-04-29  8:48 ` [Qemu-devel] [PATCH RFC v6 11/11] vfio: add bus reset notifier for host " Chen Fan
2015-04-29 17:32   ` Alex Williamson
2015-04-30  3:07     ` Chen Fan [this message]
2015-04-30  3:21       ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55419C6C.6020903@cn.fujitsu.com \
    --to=chen.fan.fnst@cn.fujitsu.com \
    --cc=alex.williamson@redhat.com \
    --cc=izumi.taku@jp.fujitsu.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.