Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Avi Kivity <avi@redhat.com>
To: Alex Williamson <alex.williamson@redhat.com>
Cc: aik@ozlabs.ru, qemu-devel@nongnu.org, kvm@vger.kernel.org
Subject: Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu
Date: Thu, 26 Jul 2012 19:34:07 +0300	[thread overview]
Message-ID: <5011717F.6090600@redhat.com> (raw)
In-Reply-To: <20120725165948.17260.82862.stgit@bling.home>

On 07/25/2012 08:03 PM, Alex Williamson wrote:

> +/*
> + * Resource setup
> + */
> +static void vfio_unmap_bar(VFIODevice *vdev, int nr)
> +{
> +    VFIOBAR *bar = &vdev->bars[nr];
> +    uint64_t size;
> +
> +    if (!memory_region_size(&bar->mem)) {
> +        return;
> +    }
> +
> +    size = memory_region_size(&bar->mmap_mem);
> +    if (size) {
> +         memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
> +         munmap(bar->mmap, size);
> +    }
> +
> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        size = memory_region_size(&vdev->msix->mmap_mem);
> +        if (size) {
> +            memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
> +            munmap(vdev->msix->mmap, size);
> +        }
> +    }

Are the three size checks needed? Everything should work without them
from the memory core point of view.

> +
> +    memory_region_destroy(&bar->mem);
> +}
> +
> +static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem,
> +                         void **map, size_t size, off_t offset,
> +                         const char *name)
> +{
> +    *map = mmap(NULL, size, PROT_READ | PROT_WRITE,
> +                MAP_SHARED, bar->fd, bar->fd_offset + offset);
> +    if (*map == MAP_FAILED) {
> +        *map = NULL;
> +        return -1;
> +    }
> +
> +    memory_region_init_ram_ptr(submem, name, size, *map);
> +    memory_region_add_subregion(mem, offset, submem);



> +
> +    return 0;
> +}
> +
> +static void vfio_map_bar(VFIODevice *vdev, int nr, uint8_t type)
> +{
> +    VFIOBAR *bar = &vdev->bars[nr];
> +    unsigned size = bar->size;
> +    char name[64];
> +
> +    sprintf(name, "VFIO %04x:%02x:%02x.%x BAR %d", vdev->host.domain,
> +            vdev->host.bus, vdev->host.slot, vdev->host.function, nr);
> +
> +    /* A "slow" read/write mapping underlies all BARs */
> +    memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
> +    pci_register_bar(&vdev->pdev, nr, type, &bar->mem);

So far all container BARs have been pure containers, without RAM or I/O
callbacks.  It should all work, but this sets precedent and requires it
to work.  I guess there's no problem supporting it though.

> +
> +    if (type & PCI_BASE_ADDRESS_SPACE_IO) {
> +        return; /* IO space is only slow, don't expect high perf here */
> +    }

What about non-x86 where IO is actually memory?  I think you can drop
this and let the address space filtering in the listener drop it if it
turns out to be in IO space.

> +
> +    if (size & ~TARGET_PAGE_MASK) {
> +        error_report("%s is too small to mmap, this may affect performance.\n",
> +                     name);
> +        return;
> +    }

We can work a little harder and align the host space offset with the
guest space offset, and map it in.

> +
> +    /*
> +     * We can't mmap areas overlapping the MSIX vector table, so we
> +     * potentially insert a direct-mapped subregion before and after it.
> +     */

This splitting is what the memory core really enjoys.  You can just
place the MSIX page over the RAM page and let it do the cut-n-paste.

> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
> +    }
> +         
> +    if (size) {
> +        strcat(name, " mmap");
> +        if (vfio_mmap_bar(bar, &bar->mem, &bar->mmap_mem, &bar->mmap,
> +                          size, 0, name)) {
> +            error_report("%s Failed. Performance may be slow\n", name);
> +        }
> +    }
> +
> +    if (vdev->msix && vdev->msix->table_bar == nr) {
> +        unsigned start;
> +
> +        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
> +                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
> +
> +        if (start < bar->size) {
> +            size = bar->size - start;
> +            strcat(name, " msix-hi");
> +            /* MSIXInfo contains another MemoryRegion for this mapping */
> +            if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
> +                              &vdev->msix->mmap, size, start, name)) {
> +                error_report("%s Failed. Performance may be slow\n", name);
> +            }
> +        }
> +    }
> +
> +    return;
> +}
> +
> +
> +static int __vfio_get_device(VFIOGroup *group,
> +                             const char *name, VFIODevice *vdev)

__foo is a reserved symbol.

> +{
> +    int ret;
> +
> +    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
> +    if (ret < 0) {
> +        error_report("vfio: error getting device %s from group %d: %s",
> +                     name, group->groupid, strerror(errno));
> +        error_report("Verify all devices in group %d "
> +                     "are bound to vfio-pci or pci-stub and not already in use",
> +                     group->groupid);
> +        return -1;
> +    }
> +
> +    vdev->group = group;
> +    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
> +
> +    vdev->fd = ret;
> +
> +    return 0;
> +}
> +
> +
> +static Property vfio_pci_dev_properties[] = {
> +    DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
> +    //TODO - support passed fds... is this necessary?

Yes.

> +    //DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
> +    //DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +
> +
> +typedef struct MSIVector {
> +    EventNotifier interrupt; /* eventfd triggered on interrupt */
> +    struct VFIODevice *vdev; /* back pointer to device */
> +    int vector; /* the vector number for this element */
> +    int virq; /* KVM irqchip route for Qemu bypass */

This calls for an abstraction (don't we have a cache where we look those
up?)

> +    bool use;
> +} MSIVector;
> +
> +
> +typedef struct VFIOContainer {
> +    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> +    struct {
> +        /* enable abstraction to support various iommu backends */
> +        union {
> +            MemoryListener listener; /* Used by type1 iommu */
> +        };

The usual was is to have a Type1VFIOContainer deriving from
VFIOContainer and adding a MemoryListener.

> +        void (*release)(struct VFIOContainer *);
> +    } iommu_data;
> +    QLIST_HEAD(, VFIOGroup) group_list;
> +    QLIST_ENTRY(VFIOContainer) next;
> +} VFIOContainer;
> +

> +#endif /* __VFIO_H__ */
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index 5a9d4e3..bd1a76c 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h

Separate patch when leaving RFC mode.

> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> new file mode 100644
> index 0000000..0a4f180
> --- /dev/null
> +++ b/linux-headers/linux/vfio.h
> @@ -0,0 +1,445 @@
> +/*
> + * VFIO API definition
> + *
> + * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
> + *     Author: Alex Williamson <alex.williamson@redhat.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +#ifndef VFIO_H
> +#define VFIO_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define VFIO_API_VERSION	0
> +
> +#ifdef __KERNEL__	/* Internal VFIO-core/bus driver API */

Use the exported file, that gets rid of the __KERNEL__ bits.

-- 
error compiling committee.c: too many arguments to function

next prev parent reply	other threads:[~2012-07-26 18:13 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-25 17:03 [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu Alex Williamson
2012-07-25 19:30 ` Avi Kivity
2012-07-25 19:53   ` Alex Williamson
2012-07-26  8:35     ` Avi Kivity
2012-07-26 14:56       ` Alex Williamson
2012-07-26 15:59         ` Avi Kivity
2012-07-26 16:33           ` Alex Williamson
2012-07-26 16:40             ` Avi Kivity
2012-07-26 19:11               ` Alex Williamson
2012-07-26 16:06         ` Avi Kivity
2012-07-26 16:40           ` Alex Williamson
2012-07-26 16:47             ` Avi Kivity
2012-07-26 15:09 ` Alex Williamson
2012-07-26 16:34 ` Avi Kivity [this message]
2012-07-26 17:40   ` Alex Williamson
2012-07-29 13:47     ` Avi Kivity
2012-07-30 22:29       ` Alex Williamson
2012-07-31 12:34         ` Avi Kivity
2012-07-31 16:56           ` Alex Williamson
2012-07-27 19:22 ` Blue Swirl
2012-07-27 20:28   ` Alex Williamson
2012-07-28  2:55   ` Alexey Kardashevskiy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5011717F.6090600@redhat.com \
    --to=avi@redhat.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).