qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: chen.fan.fnst@cn.fujitsu.com, Cao jin <caoj.fnst@cn.fujitsu.com>,
	qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH v14 Resend 08/13] vfio: add check host bus reset is support or not
Date: Thu, 24 Dec 2015 21:42:20 +0200	[thread overview]
Message-ID: <20151224213836-mutt-send-email-mst@redhat.com> (raw)
In-Reply-To: <1450982475.2950.116.camel@redhat.com>

On Thu, Dec 24, 2015 at 11:41:15AM -0700, Alex Williamson wrote:
> On Thu, 2015-12-24 at 20:23 +0200, Michael S. Tsirkin wrote:
> > On Thu, Dec 24, 2015 at 11:20:26AM -0700, Alex Williamson wrote:
> > > On Thu, 2015-12-24 at 20:06 +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Dec 24, 2015 at 10:47:06AM -0700, Alex Williamson wrote:
> > > > > On Thu, 2015-12-24 at 16:32 +0200, Michael S. Tsirkin wrote:
> > > > > > On Thu, Dec 17, 2015 at 09:41:49AM +0800, Cao jin wrote:
> > > > > > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > 
> > > > > > > when init vfio devices done, we should test all the devices
> > > > > > > supported
> > > > > > > aer whether conflict with others. For each one, get the hot
> > > > > > > reset
> > > > > > > info for the affected device list.  For each affected
> > > > > > > device,
> > > > > > > all
> > > > > > > should attach to the VM and on/below the same bus. also, we
> > > > > > > should
> > > > > > > test
> > > > > > > all of the non-AER supporting vfio-pci devices on or below
> > > > > > > the
> > > > > > > target
> > > > > > > bus to verify they have a reset mechanism.
> > > > > > > 
> > > > > > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > > > > > ---
> > > > > > >  hw/vfio/pci.c | 236
> > > > > > > ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > > >  hw/vfio/pci.h |   1 +
> > > > > > >  2 files changed, 230 insertions(+), 7 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > > > > > index d00b0e4..6926dcc 100644
> > > > > > > --- a/hw/vfio/pci.c
> > > > > > > +++ b/hw/vfio/pci.c
> > > > > > > @@ -1806,6 +1806,216 @@ static int
> > > > > > > vfio_add_std_cap(VFIOPCIDevice
> > > > > > > *vdev, uint8_t pos)
> > > > > > >      return 0;
> > > > > > >  }
> > > > > > >  
> > > > > > > +static bool vfio_pci_host_slot_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                     PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (host1->domain == host2->domain && host1->bus
> > > > > > > ==
> > > > > > > host2-
> > > > > > > > bus &&
> > > > > > > +            host1->slot == host2->slot);
> > > > > > > +}
> > > > > > > +
> > > > > > > +static bool vfio_pci_host_match(PCIHostDeviceAddress
> > > > > > > *host1,
> > > > > > > +                                PCIHostDeviceAddress
> > > > > > > *host2)
> > > > > > > +{
> > > > > > > +    return (vfio_pci_host_slot_match(host1, host2) &&
> > > > > > > +            host1->function == host2->function);
> > > > > > > +}
> > > > > > > +
> > > > > > > +struct VFIODeviceFind {
> > > > > > > +    PCIDevice *pdev;
> > > > > > > +    bool found;
> > > > > > > +};
> > > > > > > +
> > > > > > > +static void vfio_check_device_noreset(PCIBus *bus,
> > > > > > > PCIDevice
> > > > > > > *pdev,
> > > > > > > +                                      void *opaque)
> > > > > > > +{
> > > > > > > +    DeviceState *dev = DEVICE(pdev);
> > > > > > > +    DeviceClass *dc = DEVICE_GET_CLASS(dev);
> > > > > > > +    VFIOPCIDevice *vdev;
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> > > > > > > +        if (!dc->reset) {
> > > > > > > +            goto found;
> > > > > > > +        }
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +    vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
> > > > > > > +    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
> > > > > > > +        !vdev->vbasedev.reset_works) {
> > > > > > > +        goto found;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return;
> > > > > > > +found:
> > > > > > > +    find->pdev = pdev;
> > > > > > > +    find->found = true;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void device_find(PCIBus *bus, PCIDevice *pdev, void
> > > > > > > *opaque)
> > > > > > > +{
> > > > > > > +    struct VFIODeviceFind *find = opaque;
> > > > > > > +
> > > > > > > +    if (find->found) {
> > > > > > > +        return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    if (pdev == find->pdev) {
> > > > > > > +        find->found = true;
> > > > > > > +    }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int vfio_check_host_bus_reset(VFIOPCIDevice *vdev)
> > > > > > > +{
> > > > > > > +    PCIBus *bus = vdev->pdev.bus;
> > > > > > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > > > > > +    struct vfio_pci_dependent_device *devices;
> > > > > > > +    VFIOGroup *group;
> > > > > > > +    struct VFIODeviceFind find;
> > > > > > > +    int ret, i;
> > > > > > > +
> > > > > > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > > > > > +    if (ret) {
> > > > > > > +        error_report("vfio: Cannot enable AER for device
> > > > > > > %s,"
> > > > > > > +                     " device does not support hot
> > > > > > > reset.",
> > > > > > > +                     vdev->vbasedev.name);
> > > > > > > +        goto out;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    /* List all affected devices by bus reset */
> > > > > > > +    devices = &info->devices[0];
> > > > > > > +
> > > > > > > +    /* Verify that we have all the groups required */
> > > > > > > +    for (i = 0; i < info->count; i++) {
> > > > > > > +        PCIHostDeviceAddress host;
> > > > > > > +        VFIOPCIDevice *tmp;
> > > > > > > +        VFIODevice *vbasedev_iter;
> > > > > > > +        bool found = false;
> > > > > > > +
> > > > > > > +        host.domain = devices[i].segment;
> > > > > > > +        host.bus = devices[i].bus;
> > > > > > > +        host.slot = PCI_SLOT(devices[i].devfn);
> > > > > > > +        host.function = PCI_FUNC(devices[i].devfn);
> > > > > > > +
> > > > > > > +        /* Skip the current device */
> > > > > > > +        if (vfio_pci_host_match(&host, &vdev->host)) {
> > > > > > > +            continue;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure we own the group of the affected device
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(group, &vfio_group_list, next) {
> > > > > > > +            if (group->groupid == devices[i].group_id) {
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        if (!group) {
> > > > > > > +            error_report("vfio: Cannot enable AER for
> > > > > > > device
> > > > > > > %s, "
> > > > > > > +                         "depends on group %d which is not
> > > > > > > owned.",
> > > > > > > +                         vdev->vbasedev.name,
> > > > > > > devices[i].group_id);
> > > > > > > +            ret = -1;
> > > > > > > +            goto out;
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure affected devices for reset on/blow the
> > > > > > > bus
> > > > > > > */
> > > > > > > +        QLIST_FOREACH(vbasedev_iter, &group->device_list,
> > > > > > > next) {
> > > > > > > +            if (vbasedev_iter->type !=
> > > > > > > VFIO_DEVICE_TYPE_PCI) {
> > > > > > > +                continue;
> > > > > > > +            }
> > > > > > > +            tmp = container_of(vbasedev_iter,
> > > > > > > VFIOPCIDevice,
> > > > > > > vbasedev);
> > > > > > > +            if (vfio_pci_host_match(&host, &tmp->host)) {
> > > > > > > +                PCIDevice *pci = PCI_DEVICE(tmp);
> > > > > > > +
> > > > > > > +                /*
> > > > > > > +                 * For multifunction device, due to vfio
> > > > > > > driver
> > > > > > > signal all
> > > > > > > +                 * functions under the upstream link of
> > > > > > > the
> > > > > > > end
> > > > > > > point. here
> > > > > > > +                 * we validate all functions whether
> > > > > > > enable
> > > > > > > AER.
> > > > > > > +                 */
> > > > > > > +                if (vfio_pci_host_slot_match(&vdev->host,
> > > > > > > &tmp-
> > > > > > > > host) &&
> > > > > > > +                    !(tmp->features &
> > > > > > > VFIO_FEATURE_ENABLE_AER)) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, on same slot"
> > > > > > > +                                 " the dependent device %s
> > > > > > > which
> > > > > > > does not enable AER.",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +
> > > > > > > +                find.pdev = pci;
> > > > > > > +                find.found = false;
> > > > > > > +                pci_for_each_device(bus, pci_bus_num(bus),
> > > > > > > +                                    device_find, &find);
> > > > > > > +                if (!find.found) {
> > > > > > > +                    error_report("vfio: Cannot enable AER
> > > > > > > for
> > > > > > > device %s, "
> > > > > > > +                                 "the dependent device %s
> > > > > > > is
> > > > > > > not
> > > > > > > under the same bus",
> > > > > > > +                                 vdev->vbasedev.name, tmp-
> > > > > > > > vbasedev.name);
> > > > > > > +                    ret = -1;
> > > > > > > +                    goto out;
> > > > > > > +                }
> > > > > > > +                found = true;
> > > > > > > +                break;
> > > > > > > +            }
> > > > > > > +        }
> > > > > > > +
> > > > > > > +        /* Ensure all affected devices assigned to VM */
> > > > > > 
> > > > > > I am puzzled.
> > > > > > Does not kernel enforce this already?
> > > > > > If not it's a security problem.
> > > > > > If yes why does userspace need to check this?
> > > > > 
> > > > > DMA isolation and bus level isolation are separate concepts.
> > > > >  Each
> > > > > function of a multi-function device can have DMA isolation, but
> > > > > a
> > > > > user
> > > > > needs to own all of the functions affected by a bus reset in
> > > > > order
> > > > > to
> > > > > perform one.  An AER configuration can only be created if the
> > > > > user
> > > > > can
> > > > > translate a guest bus reset into a host bus reset and therefore
> > > > > needs
> > > > > to test whether it has the permissions to do so.  I believe
> > > > > over
> > > > > the
> > > > > course of reviews we've also added some simplifying constraints
> > > > > around
> > > > > this to reduce the problem set, things like all the groups
> > > > > being
> > > > > assigned rather than just owned by the user.  However, I
> > > > > believe
> > > > > the
> > > > > kernel is sound in how it provides security for bus resets.
> > > > >  Thanks,
> > > > > 
> > > > > Alex
> > > > 
> > > > Yes, sounds good.
> > > > 
> > > > So how about just trying to do bus reset at setup time?
> > > > If kernel allows this, we know it is safe ...
> > > 
> > > The host may support hotplug, what's possible at setup time may not
> > > be
> > > possible when an error occurs.
> > 
> > How does this patch help solve this problem?
> 
> I believe there's a patch in this series that re-tests on the
> occurrence of an error, before injecting the AER into the guest.

Doesn't seem robust.  What if hotplug happens right after error is
injected?

> > > It's unlikely, but worth considering I
> > > think.
> > 
> > I suspect vfio will have to solve this in kernel
> > (e.g. automatically add all new devices in the same group
> > wrt reset).
> 
> Nope, the user simply loses their ability to reset the bus if they
> don't own all the groups at the time they attempt to do a bus reset.

Hmm, this is sub-optimal.
Assume I hot-plug a device behind a bus.
I fully intend to pass it through to a VM
where all other devices are but before I
manage to do this, an error triggers.

>  Mixing bus isolation and DMA isolation would cause a mess of groups.

Not sure how what I said implies this.

I merely suggested that if vfio takes over bus reset
it should take over handling hotplug as well,
so devices added on this bus are automatically
pevented from being used by anyone except
the same VM, making it safe to reset them.

-- 
MST

  reply	other threads:[~2015-12-24 19:42 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-11-17  8:41 [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 01/13] vfio: extract vfio_get_hot_reset_info as a single function Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 02/13] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 03/13] pcie: modify the capability size assert Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 04/13] vfio: make the 4 bytes aligned for capability size Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 05/13] vfio: add pcie extanded capability support Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 06/13] aer: impove pcie_aer_init to support vfio device Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 07/13] vfio: add aer support for " Cao jin
2015-11-17  8:41 ` [Qemu-devel] [PATCH v14 08/13] vfio: add check host bus reset is support or not Cao jin
2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-12-18  1:14     ` Chen Fan
2015-12-24 14:32   ` Michael S. Tsirkin
2015-12-24 17:47     ` Alex Williamson
2015-12-24 18:06       ` Michael S. Tsirkin
2015-12-24 18:20         ` Alex Williamson
2015-12-24 18:23           ` Michael S. Tsirkin
2015-12-24 18:41             ` Alex Williamson
2015-12-24 19:42               ` Michael S. Tsirkin [this message]
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 09/13] add check reset mechanism when hotplug vfio device Cao jin
2015-12-17 20:32   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 10/13] pci: add pci device pre-post reset callbacks for host bus reset Cao jin
2015-12-17 20:31   ` [Qemu-devel] [PATCH v14 Resend " Alex Williamson
2015-12-18  3:29     ` Chen Fan
2015-12-21 21:07       ` Alex Williamson
2015-12-22  7:18         ` Chen Fan
2015-12-24  5:10         ` Chen Fan
2015-12-24 14:34           ` Michael S. Tsirkin
2015-12-25  1:18             ` Chen Fan
2015-12-23 12:00   ` Michael S. Tsirkin
2015-12-24  5:14     ` Chen Fan
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 11/13] pcie_aer: expose pcie_aer_msg() interface Cao jin
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 12/13] vfio-pci: pass the aer error to guest Cao jin
2015-11-17  8:42 ` [Qemu-devel] [PATCH v14 13/13] vfio: add 'aer' property to expose aercap Cao jin
2015-11-18 17:06 ` [Qemu-devel] [PATCH v14 00/13] vfio-pci: pass the aer error to guest Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151224213836-mutt-send-email-mst@redhat.com \
    --to=mst@redhat.com \
    --cc=alex.williamson@redhat.com \
    --cc=caoj.fnst@cn.fujitsu.com \
    --cc=chen.fan.fnst@cn.fujitsu.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).