From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:35943) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SuSZ0-0003cc-D4 for qemu-devel@nongnu.org; Thu, 26 Jul 2012 14:13:31 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1SuSYy-0005RK-Po for qemu-devel@nongnu.org; Thu, 26 Jul 2012 14:13:30 -0400 Received: from mx1.redhat.com ([209.132.183.28]:50415) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1SuSYy-0005RF-Eq for qemu-devel@nongnu.org; Thu, 26 Jul 2012 14:13:28 -0400 Message-ID: <5011717F.6090600@redhat.com> Date: Thu, 26 Jul 2012 19:34:07 +0300 From: Avi Kivity MIME-Version: 1.0 References: <20120725165948.17260.82862.stgit@bling.home> In-Reply-To: <20120725165948.17260.82862.stgit@bling.home> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [RFC PATCH] vfio: VFIO PCI driver for Qemu List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Alex Williamson Cc: aik@ozlabs.ru, qemu-devel@nongnu.org, kvm@vger.kernel.org On 07/25/2012 08:03 PM, Alex Williamson wrote: > +/* > + * Resource setup > + */ > +static void vfio_unmap_bar(VFIODevice *vdev, int nr) > +{ > + VFIOBAR *bar = &vdev->bars[nr]; > + uint64_t size; > + > + if (!memory_region_size(&bar->mem)) { > + return; > + } > + > + size = memory_region_size(&bar->mmap_mem); > + if (size) { > + memory_region_del_subregion(&bar->mem, &bar->mmap_mem); > + munmap(bar->mmap, size); > + } > + > + if (vdev->msix && vdev->msix->table_bar == nr) { > + size = memory_region_size(&vdev->msix->mmap_mem); > + if (size) { > + memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem); > + munmap(vdev->msix->mmap, size); > + } > + } Are the three size checks needed? Everything should work without them from the memory core point of view. > + > + memory_region_destroy(&bar->mem); > +} > + > +static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem, > + void **map, size_t size, off_t offset, > + const char *name) > +{ > + *map = mmap(NULL, size, PROT_READ | PROT_WRITE, > + MAP_SHARED, bar->fd, bar->fd_offset + offset); > + if (*map == MAP_FAILED) { > + *map = NULL; > + return -1; > + } > + > + memory_region_init_ram_ptr(submem, name, size, *map); > + memory_region_add_subregion(mem, offset, submem); > + > + return 0; > +} > + > +static void vfio_map_bar(VFIODevice *vdev, int nr, uint8_t type) > +{ > + VFIOBAR *bar = &vdev->bars[nr]; > + unsigned size = bar->size; > + char name[64]; > + > + sprintf(name, "VFIO %04x:%02x:%02x.%x BAR %d", vdev->host.domain, > + vdev->host.bus, vdev->host.slot, vdev->host.function, nr); > + > + /* A "slow" read/write mapping underlies all BARs */ > + memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size); > + pci_register_bar(&vdev->pdev, nr, type, &bar->mem); So far all container BARs have been pure containers, without RAM or I/O callbacks. It should all work, but this sets precedent and requires it to work. I guess there's no problem supporting it though. > + > + if (type & PCI_BASE_ADDRESS_SPACE_IO) { > + return; /* IO space is only slow, don't expect high perf here */ > + } What about non-x86 where IO is actually memory? I think you can drop this and let the address space filtering in the listener drop it if it turns out to be in IO space. > + > + if (size & ~TARGET_PAGE_MASK) { > + error_report("%s is too small to mmap, this may affect performance.\n", > + name); > + return; > + } We can work a little harder and align the host space offset with the guest space offset, and map it in. > + > + /* > + * We can't mmap areas overlapping the MSIX vector table, so we > + * potentially insert a direct-mapped subregion before and after it. > + */ This splitting is what the memory core really enjoys. You can just place the MSIX page over the RAM page and let it do the cut-n-paste. > + if (vdev->msix && vdev->msix->table_bar == nr) { > + size = vdev->msix->table_offset & TARGET_PAGE_MASK; > + } > + > + if (size) { > + strcat(name, " mmap"); > + if (vfio_mmap_bar(bar, &bar->mem, &bar->mmap_mem, &bar->mmap, > + size, 0, name)) { > + error_report("%s Failed. Performance may be slow\n", name); > + } > + } > + > + if (vdev->msix && vdev->msix->table_bar == nr) { > + unsigned start; > + > + start = TARGET_PAGE_ALIGN(vdev->msix->table_offset + > + (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE)); > + > + if (start < bar->size) { > + size = bar->size - start; > + strcat(name, " msix-hi"); > + /* MSIXInfo contains another MemoryRegion for this mapping */ > + if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem, > + &vdev->msix->mmap, size, start, name)) { > + error_report("%s Failed. Performance may be slow\n", name); > + } > + } > + } > + > + return; > +} > + > + > +static int __vfio_get_device(VFIOGroup *group, > + const char *name, VFIODevice *vdev) __foo is a reserved symbol. > +{ > + int ret; > + > + ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); > + if (ret < 0) { > + error_report("vfio: error getting device %s from group %d: %s", > + name, group->groupid, strerror(errno)); > + error_report("Verify all devices in group %d " > + "are bound to vfio-pci or pci-stub and not already in use", > + group->groupid); > + return -1; > + } > + > + vdev->group = group; > + QLIST_INSERT_HEAD(&group->device_list, vdev, next); > + > + vdev->fd = ret; > + > + return 0; > +} > + > + > +static Property vfio_pci_dev_properties[] = { > + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host), > + //TODO - support passed fds... is this necessary? Yes. > + //DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name), > + //DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > + > + > +typedef struct MSIVector { > + EventNotifier interrupt; /* eventfd triggered on interrupt */ > + struct VFIODevice *vdev; /* back pointer to device */ > + int vector; /* the vector number for this element */ > + int virq; /* KVM irqchip route for Qemu bypass */ This calls for an abstraction (don't we have a cache where we look those up?) > + bool use; > +} MSIVector; > + > + > +typedef struct VFIOContainer { > + int fd; /* /dev/vfio/vfio, empowered by the attached groups */ > + struct { > + /* enable abstraction to support various iommu backends */ > + union { > + MemoryListener listener; /* Used by type1 iommu */ > + }; The usual was is to have a Type1VFIOContainer deriving from VFIOContainer and adding a MemoryListener. > + void (*release)(struct VFIOContainer *); > + } iommu_data; > + QLIST_HEAD(, VFIOGroup) group_list; > + QLIST_ENTRY(VFIOContainer) next; > +} VFIOContainer; > + > +#endif /* __VFIO_H__ */ > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 5a9d4e3..bd1a76c 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h Separate patch when leaving RFC mode. > diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h > new file mode 100644 > index 0000000..0a4f180 > --- /dev/null > +++ b/linux-headers/linux/vfio.h > @@ -0,0 +1,445 @@ > +/* > + * VFIO API definition > + * > + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. > + * Author: Alex Williamson > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > +#ifndef VFIO_H > +#define VFIO_H > + > +#include > +#include > + > +#define VFIO_API_VERSION 0 > + > +#ifdef __KERNEL__ /* Internal VFIO-core/bus driver API */ Use the exported file, that gets rid of the __KERNEL__ bits. -- error compiling committee.c: too many arguments to function