qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
To: Alex Williamson <alex.williamson@redhat.com>,
	Cao jin <caoj.fnst@cn.fujitsu.com>
Cc: izumi.taku@jp.fujitsu.com, qemu-devel@nongnu.org, mst@redhat.com
Subject: Re: [Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is support or not
Date: Wed, 9 Mar 2016 09:26:22 +0800	[thread overview]
Message-ID: <56DF7BBE.7000408@cn.fujitsu.com> (raw)
In-Reply-To: <20160308155519.1272a09f@t450s.home>


On 03/09/2016 06:55 AM, Alex Williamson wrote:
> On Mon, 7 Mar 2016 11:22:58 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> when boot up a VM that assigning vfio devices with aer enabled, we
>> must check the vfio device whether support host bus reset. because
>> when one error occur. OS driver always recover the device by do a
>> bus reset, in order to recover the vfio device, qemu must to do a
>> host bus reset to reset the device to default status. and for all
>> affected devices by the bus reset. we must check them whether all
>> are assigned to the VM.
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>>   hw/vfio/pci.h |   1 +
>>   2 files changed, 212 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 8ec9b25..0898e34 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
>>       return 0;
>>   }
>>   
>> +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
>> +                                     PCIHostDeviceAddress *host2)
>> +{
>> +    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> +            host1->slot == host2->slot);
>> +}
>> +
>> +static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> +                                PCIHostDeviceAddress *host2)
>> +{
>> +    return (vfio_pci_host_slot_match(host1, host2) &&
>> +            host1->function == host2->function);
>> +}
>> +
>> +struct VFIODeviceFind {
>> +    PCIDevice *pdev;
>> +    bool found;
>> +};
>> +
>> +static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
>> +                                      void *opaque)
>> +{
>> +    DeviceState *dev = DEVICE(pdev);
>> +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
>> +    VFIOPCIDevice *vdev;
>> +    struct VFIODeviceFind *find = opaque;
>> +
>> +    if (find->found) {
>> +        return;
>> +    }
>> +
>> +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> +        if (!dc->reset) {
>> +            goto found;
>> +        }
>> +        return;
>> +    }
>> +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
>> +        !vdev->vbasedev.reset_works) {
>> +        goto found;
>> +    }
>> +
>> +    return;
>> +found:
>> +    find->pdev = pdev;
>> +    find->found = true;
>> +}
>> +
>> +static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>> +{
>> +    PCIBus *bus = vdev->pdev.bus;
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +    struct VFIODeviceFind find;
>> +    int ret, i;
>> +
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret) {
>> +        error_setg(errp, "vfio: Cannot enable AER for device %s,"
>> +                   " device does not support hot reset.",
>> +                   vdev->vbasedev.name);
>> +        return;
>> +    }
>> +
>> +    /* List all affected devices by bus reset */
>> +    devices = &info->devices[0];
>> +
>> +    /* Verify that we have all the groups required */
>> +    for (i = 0; i < info->count; i++) {
>> +        PCIHostDeviceAddress host;
>> +        VFIOPCIDevice *tmp;
>> +        VFIODevice *vbasedev_iter;
>> +        bool found = false;
>> +
>> +        host.domain = devices[i].segment;
>> +        host.bus = devices[i].bus;
>> +        host.slot = PCI_SLOT(devices[i].devfn);
>> +        host.function = PCI_FUNC(devices[i].devfn);
>> +
>> +        /* Skip the current device */
>> +        if (vfio_pci_host_match(&host, &vdev->host)) {
>> +            continue;
>> +        }
>> +
>> +        /* Ensure we own the group of the affected device */
>> +        QLIST_FOREACH(group, &vfio_group_list, next) {
>> +            if (group->groupid == devices[i].group_id) {
>> +                break;
>> +            }
>> +        }
>> +
>> +        if (!group) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "depends on group %d which is not owned.",
>> +                       vdev->vbasedev.name, devices[i].group_id);
>> +            goto out;
>> +        }
>> +
>> +        /* Ensure affected devices for reset on the same slot */
>> +        QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +            if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
>> +            if (vfio_pci_host_match(&host, &tmp->host)) {
>> +                /*
>> +                 * AER errors may be broadcast to all functions of a multi-
>> +                 * function endpoint.  If any of those sibling functions are
>> +                 * also assigned, they need to have AER enabled or else an
>> +                 * error may continue to cause a vm_stop condition.  IOW,
>> +                 * AER setup of this function would be pointless.
>> +                 */
>> +                if (vfio_pci_host_slot_match(&vdev->host, &tmp->host) &&
>> +                    !(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
>> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, on same slot"
>> +                               " the dependent device %s which does not enable AER.",
>> +                               vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    goto out;
>> +                }
>> +
>> +                if (tmp->pdev.bus != bus) {
>> +                    error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                               "the dependent device %s is not on the same bus",
>> +                               vdev->vbasedev.name, tmp->vbasedev.name);
>> +                    goto out;
>> +                }
>> +                found = true;
>> +                break;
>> +            }
>> +        }
>> +
>> +        /* Ensure all affected devices assigned to VM */
>> +        if (!found) {
>> +            error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                       "the dependent device %04x:%02x:%02x.%x "
>> +                       "is not assigned to VM.",
>> +                       vdev->vbasedev.name, host.domain, host.bus,
>> +                       host.slot, host.function);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Check the all pci devices on or below the target bus
>> +     * have a reset mechanism at least.
>> +     */
>> +    find.pdev = NULL;
>> +    find.found = false;
>> +    pci_for_each_device(bus, pci_bus_num(bus),
> I'm not fully convinced this does what it says that it does.  Bus
> numbers are under guest control, but using pci_bus_num(bus) here will
> cause us to always take the path in pci_find_bus_nr() where it simply
> returns bus.  Thus we call pci_for_each_device_under_bus() with the bus
> originally provided in pci_for_each_device().  But
> pci_for_each_device_under_bus() only iterates the devfns immediately on
> that bus, despite the name that would imply otherwise.  So it seems
> like the pci_for_each_device() callback would need to itself call
> pci_for_each_device() if it finds a bridge.  Am I missing something or
> is that correct?
>
> We could also just go on to require that there are no subordinate buses
> to this bus, which seems like a reasonable thing to do.  Thanks,
yes. this should be enough for us.

Thanks,
Chen

>
> Alex
>
>> +                        vfio_check_device_noreset, &find);
>> +    if (find.found) {
>> +        error_setg(errp, "vfio: Cannot enable AER for device %s, "
>> +                   "the affected device %s does not have a reset mechanism.",
>> +                   vdev->vbasedev.name, find.pdev->name);
>> +        goto out;
>> +    }
>> +
>> +out:
>> +    g_free(info);
>> +    return;
>> +}
>> +
>> +static void vfio_check_devices_host_bus_reset(Error **errp)
>> +{
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +    VFIOPCIDevice *vdev;
>> +    Error *local_err = NULL;
>> +
>> +    /* Check All vfio-pci devices if have bus reset capability */
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            if (vbasedev->type != VFIO_DEVICE_TYPE_PCI) {
>> +                continue;
>> +            }
>> +            vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev);
>> +            if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
>> +                vfio_check_host_bus_reset(vdev, &local_err);
>> +                if (local_err) {
>> +                    error_propagate(errp, local_err);
>> +                    return;
>> +                }
>> +            }
>> +        }
>> +    }
>> +
>> +    return;
>> +}
>> +
>>   static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
>>                             int pos, uint16_t size)
>>   {
>> @@ -2047,13 +2238,6 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
>>       vfio_intx_enable(vdev);
>>   }
>>   
>> -static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
>> -                                PCIHostDeviceAddress *host2)
>> -{
>> -    return (host1->domain == host2->domain && host1->bus == host2->bus &&
>> -            host1->slot == host2->slot && host1->function == host2->function);
>> -}
>> -
>>   static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
>>   {
>>       VFIOGroup *group;
>> @@ -2559,6 +2743,21 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
>>       vdev->req_enabled = false;
>>   }
>>   
>> +static void vfio_pci_machine_done_notify(Notifier *notifier, void *unused)
>> +{
>> +    Error *local_err = NULL;
>> +
>> +    vfio_check_devices_host_bus_reset(&local_err);
>> +    if (local_err) {
>> +        error_report_err(local_err);
>> +        exit(1);
>> +    }
>> +}
>> +
>> +static Notifier machine_notifier = {
>> +    .notify = vfio_pci_machine_done_notify,
>> +};
>> +
>>   static int vfio_initfn(PCIDevice *pdev)
>>   {
>>       VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
>> @@ -2905,6 +3104,11 @@ static const TypeInfo vfio_pci_dev_info = {
>>   static void register_vfio_pci_dev_type(void)
>>   {
>>       type_register_static(&vfio_pci_dev_info);
>> +    /*
>> +     * Register notifier when machine init is done, since we need
>> +     * check the configration manner after all vfio device are inited.
>> +     */
>> +    qemu_add_machine_init_done_notifier(&machine_notifier);
>>   }
>>   
>>   type_init(register_vfio_pci_dev_type)
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index e0c53f2..aff46c2 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -15,6 +15,7 @@
>>   #include "qemu-common.h"
>>   #include "exec/memory.h"
>>   #include "hw/pci/pci.h"
>> +#include "hw/pci/pci_bus.h"
>>   #include "hw/pci/pci_bridge.h"
>>   #include "hw/vfio/vfio-common.h"
>>   #include "qemu/event_notifier.h"
>
>
> .
>

  reply	other threads:[~2016-03-09  1:32 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-07  3:22 [Qemu-devel] [PATCH v2 Resend 00/11] vfio-pci: pass the aer error to guest, part2 Cao jin
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 01/11] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 02/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 03/11] vfio: add pcie extended capability support Cao jin
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 04/11] vfio: add aer support for vfio device Cao jin
2016-03-08 22:55   ` Alex Williamson
2016-03-09  1:21     ` Chen Fan
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is support or not Cao jin
2016-03-08 22:55   ` Alex Williamson
2016-03-09  1:26     ` Chen Fan [this message]
2016-03-09 16:47   ` Michael S. Tsirkin
2016-03-09 16:59     ` Alex Williamson
2016-03-09 17:21       ` Michael S. Tsirkin
2016-03-07  3:22 ` [Qemu-devel] [PATCH v2 06/11] pci: add a is_valid_func callback to check device if complete Cao jin
2016-03-09 16:22   ` Michael S. Tsirkin
2016-03-09 16:50     ` Alex Williamson
2016-03-09 17:14       ` Michael S. Tsirkin
2016-03-10  2:00         ` Chen Fan
2016-03-07  3:23 ` [Qemu-devel] [PATCH v2 07/11] vfio: add check aer functionality for hotplug device Cao jin
2016-03-07  3:23 ` [Qemu-devel] [PATCH v2 08/11] pci: introduce pci bus pre reset Cao jin
2016-03-07  3:23 ` [Qemu-devel] [PATCH v2 09/11] vfio: vote a device to do host bus reset Cao jin
2016-03-09 10:07   ` Michael S. Tsirkin
2016-03-09 16:37   ` Alex Williamson
2016-03-10  6:15     ` Chen Fan
2016-03-10 14:16       ` Michael S. Tsirkin
2016-03-09 16:39   ` Michael S. Tsirkin
2016-03-09 17:09     ` Alex Williamson
2016-03-09 17:31       ` Michael S. Tsirkin
2016-03-07  3:23 ` [Qemu-devel] [PATCH v2 10/11] vfio-pci: pass the aer error to guest Cao jin
2016-03-07  3:23 ` [Qemu-devel] [PATCH v2 11/11] vfio: add 'aer' property to expose aercap Cao jin
  -- strict thread matches above, loose matches on Subject: below --
2016-02-19 10:42 [Qemu-devel] [PATCH v2 00/11] vfio-pci: pass the aer error to guest, part2 Cao jin
2016-02-19 10:42 ` [Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is support or not Cao jin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56DF7BBE.7000408@cn.fujitsu.com \
    --to=chen.fan.fnst@cn.fujitsu.com \
    --cc=alex.williamson@redhat.com \
    --cc=caoj.fnst@cn.fujitsu.com \
    --cc=izumi.taku@jp.fujitsu.com \
    --cc=mst@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).