From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Yang, Sheng" Subject: Re: [PATCH 5/7] KVM/userspace: Device Assignment: Support for assigning PCI devices to guests Date: Thu, 25 Sep 2008 13:20:23 +0800 Message-ID: <200809251320.24375.sheng.yang@intel.com> References: <1222181695-23418-1-git-send-email-amit.shah@redhat.com> <1222181695-23418-6-git-send-email-amit.shah@redhat.com> <200809251254.46990.sheng.yang@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Cc: Amit Shah , "avi@redhat.com" , "muli@il.ibm.com" , "anthony@codemonkey.ws" , "benami@il.ibm.com" , "Han, Weidong" , "Kay, Allen M" To: kvm@vger.kernel.org Return-path: Received: from mga01.intel.com ([192.55.52.88]:15181 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751473AbYIYFTs (ORCPT ); Thu, 25 Sep 2008 01:19:48 -0400 In-Reply-To: <200809251254.46990.sheng.yang@intel.com> Content-Disposition: inline Sender: kvm-owner@vger.kernel.org List-ID: On Thursday 25 September 2008 12:54:46 Yang, Sheng wrote: > On Tuesday 23 September 2008 22:54:53 Amit Shah wrote: > > From: Or Sagi > > From: Nir Peleg > > From: Amit Shah > > From: Ben-Ami Yassour > > From: Weidong Han > > From: Glauber de Oliveira Costa > > > > With this patch, we can assign a device on the host machine to a > > guest. > > > > A new command-line option, -pcidevice is added. > > For example, to invoke it for a device sitting at PCI bus:dev.fn > > 04:08.0, use this: > > > > -pcidevice host=04:08.0 > > > > * The host driver for the device, if any, is to be removed before > > assigning the device (else device assignment will fail). > > > > * A device that shares IRQ with another host device cannot currently > > be assigned. > > > > This works only with the in-kernel irqchip method; to use the > > userspace irqchip, a kernel module (irqhook) and some extra changes > > are needed. > > Hi Amit > > I am afraid I got this when try to enable VT-d. > > create_userspace_phys_mem: Invalid argument > assigned_dev_iomem_map: Error: create new mapping failed > > Can you have a look at it? (and the patch you sent to Weidong don't got > this problem.) Oh, Weidong's patch "[PATCH] VT-d: Fix iommu map page for mmio pages" fix it. -- regards Yang, Sheng > > Thanks. > -- > regards > Yang, Sheng > > > Signed-off-by: Amit Shah > > --- > > qemu/Makefile.target | 1 + > > qemu/hw/device-assignment.c | 665 > > +++++++++++++++++++++++++++++++++++++++++++ qemu/hw/device-assignment.h | > > 93 ++++++ > > qemu/hw/pc.c | 9 + > > qemu/hw/pci.c | 7 + > > qemu/vl.c | 18 ++ > > 6 files changed, 793 insertions(+), 0 deletions(-) > > create mode 100644 qemu/hw/device-assignment.c > > create mode 100644 qemu/hw/device-assignment.h > > > > diff --git a/qemu/Makefile.target b/qemu/Makefile.target > > index 72f3db8..40eb273 100644 > > --- a/qemu/Makefile.target > > +++ b/qemu/Makefile.target > > @@ -616,6 +616,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o > > OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o > > OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o > > OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o > > +OBJS+= device-assignment.o > > ifeq ($(USE_KVM_PIT), 1) > > OBJS+= i8254-kvm.o > > endif > > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > > new file mode 100644 > > index 0000000..e70daf2 > > --- /dev/null > > +++ b/qemu/hw/device-assignment.c > > @@ -0,0 +1,665 @@ > > +/* > > + * Copyright (c) 2007, Neocleus Corporation. > > + * > > + * This program is free software; you can redistribute it and/or modify > > it + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but > > WITHOUT + * ANY WARRANTY; without even the implied warranty of > > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > General Public License for + * more details. > > + * > > + * You should have received a copy of the GNU General Public License > > along with + * this program; if not, write to the Free Software > > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 > > USA. > > + * > > + * > > + * Assign a PCI device from the host to a guest VM. > > + * > > + * Adapted for KVM by Qumranet. > > + * > > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) > > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) > > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) > > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) > > + */ > > +#include > > +#include > > +#include "qemu-kvm.h" > > +#include > > +#include "device-assignment.h" > > + > > +/* From linux/ioport.h */ > > +#define IORESOURCE_IO 0x00000100 /* Resource type */ > > +#define IORESOURCE_MEM 0x00000200 > > +#define IORESOURCE_IRQ 0x00000400 > > +#define IORESOURCE_DMA 0x00000800 > > +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > > + > > +/* #define DEVICE_ASSIGNMENT_DEBUG */ > > + > > +#ifdef DEVICE_ASSIGNMENT_DEBUG > > +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __func__ , ## > > args) +#else > > +#define DEBUG(fmt, args...) > > +#endif > > + > > +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, > > + uint32_t value) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > > + + (addr - r_access->e_physbase); > > + > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x" > > + " r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + iopl(3); > > + outb(value, r_pio); > > +} > > + > > +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, > > + uint32_t value) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > > + + (addr - r_access->e_physbase); > > + > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x" > > + " r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + iopl(3); > > + outw(value, r_pio); > > +} > > + > > +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, > > + uint32_t value) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > > + + (addr - r_access->e_physbase); > > + > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x" > > + " r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + iopl(3); > > + outl(value, r_pio); > > +} > > + > > +static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (addr - r_access->e_physbase) > > + + (unsigned long)r_access->r_virtbase; > > + uint32_t value; > > + > > + iopl(3); > > + value = inb(r_pio); > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x " > > + "r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + return value; > > +} > > + > > +static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (addr - r_access->e_physbase) > > + + (unsigned long)r_access->r_virtbase; > > + uint32_t value; > > + > > + iopl(3); > > + value = inw(r_pio); > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x " > > + "r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + return value; > > +} > > + > > +static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr) > > +{ > > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > > + uint32_t r_pio = (addr - r_access->e_physbase) > > + + (unsigned long)r_access->r_virtbase; > > + uint32_t value; > > + > > + iopl(3); > > + value = inl(r_pio); > > + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { > > + fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x " > > + "r_virtbase=%08lx value=%08x\n", > > + __func__, r_pio, (int)r_access->e_physbase, > > + (unsigned long)r_access->r_virtbase, value); > > + } > > + return value; > > +} > > + > > +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, > > + uint32_t e_phys, uint32_t e_size, int type) > > +{ > > + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; > > + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; > > + int first_map = (region->e_size == 0); > > + int ret = 0; > > + > > + DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", > > + e_phys, r_dev->v_addrs[region_num].r_virtbase, type, > > e_size, + region_num); > > + > > + region->e_physbase = e_phys; > > + region->e_size = e_size; > > + > > + /* FIXME: Add support for emulated MMIO for non-kvm guests */ > > + if (kvm_enabled()) { > > + if (!first_map) > > + kvm_destroy_phys_mem(kvm_context, e_phys, > > e_size); + if (e_size > 0) > > + ret = kvm_register_phys_mem(kvm_context, e_phys, > > + region->r_virtbase, > > + e_size, 0); > > + if (ret != 0) > > + fprintf(stderr, > > + "%s: Error: create new mapping failed\n", > > + __func__); > > + } > > +} > > + > > +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, > > + uint32_t addr, uint32_t size, int > > type) +{ > > + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; > > + > > + r_dev->v_addrs[region_num].e_physbase = addr; > > + DEBUG("%s: address=0x%x type=0x%x len=%d region_num=%d \n", > > + __func__, addr, type, size, region_num); > > + > > + register_ioport_read(addr, size, 1, assigned_dev_ioport_readb, > > + (void *) (r_dev->v_addrs + region_num)); > > + register_ioport_read(addr, size, 2, assigned_dev_ioport_readw, > > + (void *) (r_dev->v_addrs + region_num)); > > + register_ioport_read(addr, size, 4, assigned_dev_ioport_readl, > > + (void *) (r_dev->v_addrs + region_num)); > > + register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb, > > + (void *) (r_dev->v_addrs + region_num)); > > + register_ioport_write(addr, size, 2, assigned_dev_ioport_writew, > > + (void *) (r_dev->v_addrs + region_num)); > > + register_ioport_write(addr, size, 4, assigned_dev_ioport_writel, > > + (void *) (r_dev->v_addrs + region_num)); > > +} > > + > > +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t > > address, + uint32_t val, int len) > > +{ > > + int fd, r; > > + > > + DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n", > > + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > > + (uint16_t) address, val, len); > > + > > + if (address == 0x4) { > > + pci_default_write_config(d, address, val, len); > > + /* Continue to program the card */ > > + } > > + > > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > > + address == 0x3c || address == 0x3d) { > > + /* used for update-mappings (BAR emulation) */ > > + pci_default_write_config(d, address, val, len); > > + return; > > + } > > + DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", > > + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > > + (uint16_t) address, val, len); > > + fd = ((AssignedDevice *)d)->real_device.config_fd; > > + r = lseek(fd, address, SEEK_SET); > > + if (r < 0) { > > + fprintf(stderr, "%s: bad seek, errno = %d\n", > > + __func__, errno); > > + return; > > + } > > +again: > > + r = write(fd, &val, len); > > + if (r < 0) { > > + if (errno == EINTR || errno == EAGAIN) > > + goto again; > > + fprintf(stderr, "%s: write failed, errno = %d\n", > > + __func__, errno); > > + } > > +} > > + > > +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t > > address, + int len) > > +{ > > + uint32_t val = 0; > > + int fd, r; > > + > > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > > + address == 0x3c || address == 0x3d) { > > + val = pci_default_read_config(d, address, len); > > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, > > val, + len); > > + return val; > > + } > > + > > + /* vga specific, remove later */ > > + if (address == 0xFC) > > + goto do_log; > > + > > + fd = ((AssignedDevice *)d)->real_device.config_fd; > > + r = lseek(fd, address, SEEK_SET); > > + if (r < 0) { > > + fprintf(stderr, "%s: bad seek, errno = %d\n", > > + __func__, errno); > > + return val; > > + } > > +again: > > + r = read(fd, &val, len); > > + if (r < 0) { > > + if (errno == EINTR || errno == EAGAIN) > > + goto again; > > + fprintf(stderr, "%s: read failed, errno = %d\n", > > + __func__, errno); > > + } > > +do_log: > > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, > > len); + > > + /* kill the special capabilities */ > > + if (address == 4 && len == 4) > > + val &= ~0x100000; > > + else if (address == 6) > > + val &= ~0x10; > > + > > + return val; > > +} > > + > > +static int assigned_dev_register_regions(PCIRegion *io_regions, > > + unsigned long regions_num, > > + AssignedDevice *pci_dev) > > +{ > > + uint32_t i; > > + PCIRegion *cur_region = io_regions; > > + > > + for (i = 0; i < regions_num; i++, cur_region++) { > > + if (!cur_region->valid) > > + continue; > > +#ifdef DEVICE_ASSIGNMENT_DEBUG > > + pci_dev->v_addrs[i].debug |= DEVICE_ASSIGNMENT_DEBUG_MMIO > > + | > > DEVICE_ASSIGNMENT_DEBUG_PIO; +#endif > > + pci_dev->v_addrs[i].num = i; > > + > > + /* handle memory io regions */ > > + if (cur_region->type & IORESOURCE_MEM) { > > + int t = cur_region->type & IORESOURCE_PREFETCH > > + ? PCI_ADDRESS_SPACE_MEM_PREFETCH > > + : PCI_ADDRESS_SPACE_MEM; > > + > > + /* map physical memory */ > > + pci_dev->v_addrs[i].e_physbase = > > cur_region->base_addr; + > > pci_dev->v_addrs[i].r_virtbase = > > + mmap(NULL, > > + (cur_region->size + 0xFFF) & > > 0xFFFFF000, + PROT_WRITE | PROT_READ, > > MAP_SHARED, + cur_region->resource_fd, > > (off_t) 0); + > > + if ((void *) -1 == > > pci_dev->v_addrs[i].r_virtbase) { + > > fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!" + > > "\n", __func__, > > + (uint32_t) > > (cur_region->base_addr)); + return -1; > > + } > > + pci_dev->v_addrs[i].r_size = cur_region->size; > > + pci_dev->v_addrs[i].e_size = 0; > > + > > + /* add offset */ > > + pci_dev->v_addrs[i].r_virtbase += > > + (cur_region->base_addr & 0xFFF); > > + > > + pci_register_io_region((PCIDevice *) pci_dev, i, > > + cur_region->size, t, > > + assigned_dev_iomem_map); > > + continue; > > + } > > + /* handle port io regions */ > > + pci_register_io_region((PCIDevice *) pci_dev, i, > > + cur_region->size, > > PCI_ADDRESS_SPACE_IO, + > > assigned_dev_ioport_map); > > + > > + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; > > + pci_dev->v_addrs[i].r_virtbase = > > + (void *)(long)cur_region->base_addr; > > + /* not relevant for port io */ > > + pci_dev->v_addrs[i].memory_index = 0; > > + } > > + > > + /* success */ > > + return 0; > > +} > > + > > +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, > > + uint8_t r_dev, uint8_t r_func) > > +{ > > + char dir[128], name[128], comp[16]; > > + int fd, r = 0; > > + FILE *f; > > + unsigned long long start, end, size, flags; > > + PCIRegion *rp; > > + PCIDevRegions *dev = &pci_dev->real_device; > > + > > + dev->region_number = 0; > > + > > + sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/", > > + r_bus, r_dev, r_func); > > + strcpy(name, dir); > > + strcat(name, "config"); > > + fd = open(name, O_RDWR); > > + if (fd == -1) { > > + fprintf(stderr, "%s: %s: %m\n", __func__, name); > > + return 1; > > + } > > + dev->config_fd = fd; > > +again: > > + r = read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config); > > + if (r < 0) { > > + if (errno == EINTR || errno == EAGAIN) > > + goto again; > > + fprintf(stderr, "%s: read failed, errno = %d\n", > > + __func__, errno); > > + } > > + strcpy(name, dir); > > + strcat(name, "resource"); > > + > > + f = fopen(name, "r"); > > + if (f == NULL) { > > + fprintf(stderr, "%s: %s: %m\n", __func__, name); > > + return 1; > > + } > > + for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) > > == 3; + r++) { > > + rp = dev->regions + r; > > + rp->valid = 0; > > + size = end - start + 1; > > + flags &= IORESOURCE_IO | IORESOURCE_MEM | > > IORESOURCE_PREFETCH; + if (size == 0 || (flags & > > ~IORESOURCE_PREFETCH) == 0) + continue; > > + if (flags & IORESOURCE_MEM) { > > + flags &= ~IORESOURCE_IO; > > + sprintf(comp, "resource%d", r); > > + strcpy(name, dir); > > + strcat(name, comp); > > + fd = open(name, O_RDWR); > > + if (fd == -1) > > + continue; /* probably ROM > > */ + rp->resource_fd = fd; > > + } else > > + flags &= ~IORESOURCE_PREFETCH; > > + > > + rp->type = flags; > > + rp->valid = 1; > > + rp->base_addr = start; > > + rp->size = size; > > + DEBUG("%s: region %d size %d start 0x%x type %d " > > + "resource_fd %d\n", __func__, r, rp->size, start, > > + rp->type, rp->resource_fd); > > + } > > + fclose(f); > > + > > + dev->region_number = r; > > + return 0; > > +} > > + > > +#define MAX_ASSIGNED_DEVS 4 > > +struct { > > + char name[15]; > > + int bus; > > + int dev; > > + int func; > > + AssignedDevice *assigned_dev; > > +} assigned_devices[MAX_ASSIGNED_DEVS]; > > + > > +int nr_assigned_devices; > > +static int disable_iommu; > > + > > +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) > > +{ > > + return (uint32_t)bus << 8 | (uint32_t)devfn; > > +} > > + > > +static AssignedDevice *register_real_device(PCIBus *e_bus, > > + const char *e_dev_name, > > + int e_devfn, uint8_t r_bus, > > + uint8_t r_dev, uint8_t > > r_func) +{ > > + int r; > > + AssignedDevice *pci_dev; > > + uint8_t e_device, e_intx; > > + > > + DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n", > > + __func__, e_dev_name, e_devfn); > > + > > + pci_dev = (AssignedDevice *) > > + pci_register_device(e_bus, e_dev_name, > > sizeof(AssignedDevice), + e_devfn, > > assigned_dev_pci_read_config, + > > assigned_dev_pci_write_config); + if (NULL == pci_dev) { > > + fprintf(stderr, "%s: Error: Couldn't register real device > > %s\n", + __func__, e_dev_name); > > + return NULL; > > + } > > + if (get_real_device(pci_dev, r_bus, r_dev, r_func)) { > > + fprintf(stderr, "%s: Error: Couldn't get real device > > (%s)!\n", + __func__, e_dev_name); > > + goto out; > > + } > > + > > + /* handle real device's MMIO/PIO BARs */ > > + if (assigned_dev_register_regions(pci_dev->real_device.regions, > > + > > pci_dev->real_device.region_number, + > > pci_dev)) > > + goto out; > > + > > + /* handle interrupt routing */ > > + e_device = (pci_dev->dev.devfn >> 3) & 0x1f; > > + e_intx = pci_dev->dev.config[0x3d] - 1; > > + pci_dev->intpin = e_intx; > > + pci_dev->run = 0; > > + pci_dev->girq = 0; > > + pci_dev->h_busnr = r_bus; > > + pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func); > > + > > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > > + if (kvm_enabled()) { > > + struct kvm_assigned_pci_dev assigned_dev_data; > > + > > + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > > + assigned_dev_data.assigned_dev_id = > > + calc_assigned_dev_id(pci_dev->h_busnr, > > + (uint32_t)pci_dev->h_devfn); > > + assigned_dev_data.busnr = pci_dev->h_busnr; > > + assigned_dev_data.devfn = pci_dev->h_devfn; > > + > > +#ifdef KVM_CAP_IOMMU > > + /* We always enable the IOMMU if present > > + * (or when not disabled on the command line) > > + */ > > + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); > > + if (r && !disable_iommu) > > + assigned_dev_data.flags |= > > KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif > > + r = kvm_assign_pci_device(kvm_context, > > &assigned_dev_data); + if (r < 0) { > > + fprintf(stderr, "Could not notify kernel about " > > + "assigned device \"%s\"\n", e_dev_name); > > + perror("pt-ioctl"); > > + goto out; > > + } > > + } > > +#endif > > + fprintf(stderr, "Registered host PCI device %02x:%02x.%1x " > > + "(\"%s\") as guest device %02x:%02x.%1x\n", > > + r_bus, r_dev, r_func, e_dev_name, > > + pci_bus_num(e_bus), e_device, r_func); > > + > > + return pci_dev; > > +out: > > + pci_unregister_device(&pci_dev->dev); > > + return NULL; > > +} > > + > > +extern int get_param_value(char *buf, int buf_size, > > + const char *tag, const char *str); > > +extern int piix_get_irq(int); > > + > > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > > +/* The pci config space got updated. Check if irq numbers have changed > > + * for our devices > > + */ > > +void assigned_dev_update_irq(PCIDevice *d) > > +{ > > + int i, irq, r; > > + AssignedDevice *assigned_dev; > > + > > + for (i = 0; i < nr_assigned_devices; i++) { > > + assigned_dev = assigned_devices[i].assigned_dev; > > + if (assigned_dev == NULL) > > + continue; > > + > > + irq = pci_map_irq(&assigned_dev->dev, > > assigned_dev->intpin); + irq = piix_get_irq(irq); > > + > > + if (irq != assigned_dev->girq) { > > + struct kvm_assigned_irq assigned_irq_data; > > + > > + memset(&assigned_irq_data, 0, sizeof > > assigned_irq_data); + > > assigned_irq_data.assigned_dev_id = > > + > > calc_assigned_dev_id(assigned_dev->h_busnr, + > > (uint8_t) > > + > > assigned_dev->h_devfn); + > > assigned_irq_data.guest_irq = irq; > > + assigned_irq_data.host_irq = > > + assigned_dev->real_device.irq; > > + r = kvm_assign_irq(kvm_context, > > &assigned_irq_data); + if (r < 0) { > > + perror("assigned_dev_update_irq"); > > + fprintf(stderr, "Are you assigning a > > device " + "that shares IRQ with > > some other " + "device?\n"); > > + > > pci_unregister_device(&assigned_dev->dev); + > > continue; > > + } > > + assigned_dev->girq = irq; > > + } > > + } > > +} > > +#endif > > + > > +static int init_device_assignment(void) > > +{ > > + /* Do we have any devices to be assigned? */ > > + if (nr_assigned_devices == 0) > > + return -1; > > + iopl(3); > > + return 0; > > +} > > + > > +struct PCIDevice *init_assigned_device(PCIBus *bus, int *index) > > +{ > > + AssignedDevice *dev = NULL; > > + int i; > > + > > + if (*index == -1) { > > + if (init_device_assignment() < 0) > > + return NULL; > > + > > + *index = nr_assigned_devices - 1; > > + } > > + i = *index; > > + dev = register_real_device(bus, assigned_devices[i].name, -1, > > + assigned_devices[i].bus, > > + assigned_devices[i].dev, > > + assigned_devices[i].func); > > + if (dev == NULL) { > > + fprintf(stderr, "Error: Couldn't register device > > \"%s\"\n", + assigned_devices[i].name); > > + } > > + assigned_devices[i].assigned_dev = dev; > > + > > + --*index; > > + return &dev->dev; > > +} > > + > > +/* > > + * Syntax to assign device: > > + * > > + * -pcidevice dev=bus:dev.func,dma=dma > > + * > > + * Example: > > + * -pcidevice host=00:13.0,dma=pvdma > > + * > > + * dma can currently only be 'none' to disable iommu support. > > + */ > > +void add_assigned_device(const char *arg) > > +{ > > + char *cp, *cp1; > > + char device[8]; > > + char dma[6]; > > + int r; > > + > > + if (nr_assigned_devices >= MAX_ASSIGNED_DEVS) { > > + fprintf(stderr, "Too many assigned devices (max %d)\n", > > + MAX_ASSIGNED_DEVS); > > + return; > > + } > > + memset(&assigned_devices[nr_assigned_devices], 0, > > + sizeof assigned_devices[nr_assigned_devices]); > > + > > + r = get_param_value(device, sizeof device, "host", arg); > > + > > + r = get_param_value(assigned_devices[nr_assigned_devices].name, > > + sizeof > > assigned_devices[nr_assigned_devices].name, + > > "name", arg); > > + if (!r) > > + strncpy(assigned_devices[nr_assigned_devices].name, > > device, 8); + > > +#ifdef KVM_CAP_IOMMU > > + r = get_param_value(dma, sizeof dma, "dma", arg); > > + if (r && !strncmp(dma, "none", 4)) > > + disable_iommu = 1; > > +#endif > > + cp = device; > > + assigned_devices[nr_assigned_devices].bus = strtoul(cp, &cp1, > > 16); + if (*cp1 != ':') > > + goto bad; > > + cp = cp1 + 1; > > + > > + assigned_devices[nr_assigned_devices].dev = strtoul(cp, &cp1, > > 16); + if (*cp1 != '.') > > + goto bad; > > + cp = cp1 + 1; > > + > > + assigned_devices[nr_assigned_devices].func = strtoul(cp, &cp1, > > 16); + > > + nr_assigned_devices++; > > + return; > > +bad: > > + fprintf(stderr, "pcidevice argument parse error; " > > + "please check the help text for usage\n"); > > +} > > diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h > > new file mode 100644 > > index 0000000..b77e484 > > --- /dev/null > > +++ b/qemu/hw/device-assignment.h > > @@ -0,0 +1,93 @@ > > +/* > > + * Copyright (c) 2007, Neocleus Corporation. > > + * Copyright (c) 2007, Intel Corporation. > > + * > > + * This program is free software; you can redistribute it and/or modify > > it + * under the terms and conditions of the GNU General Public License, > > + * version 2, as published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope it will be useful, but > > WITHOUT + * ANY WARRANTY; without even the implied warranty of > > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > > General Public License for + * more details. > > + * > > + * You should have received a copy of the GNU General Public License > > along with + * this program; if not, write to the Free Software > > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 > > USA. > > + * > > + * Data structures for storing PCI state > > + * > > + * Adapted to kvm by Qumranet > > + * > > + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) > > + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) > > + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) > > + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) > > + */ > > + > > +#ifndef __DEVICE_ASSIGNMENT_H__ > > +#define __DEVICE_ASSIGNMENT_H__ > > + > > +#include > > +#include "qemu-common.h" > > +#include "pci.h" > > +#include > > + > > +#define DEVICE_ASSIGNMENT_DEBUG_PIO (0x01) > > +#define DEVICE_ASSIGNMENT_DEBUG_MMIO (0x02) > > + > > +/* From include/linux/pci.h in the kernel sources */ > > +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & > > 0x07)) + > > +typedef uint32_t pciaddr_t; > > + > > +#define MAX_IO_REGIONS (6) > > + > > +typedef struct pci_region_s { > > + int type; /* Memory or port I/O */ > > + int valid; > > + pciaddr_t base_addr; > > + pciaddr_t size; /* size of the region */ > > + int resource_fd; > > +} PCIRegion; > > + > > +typedef struct pci_dev_s { > > + uint8_t bus, dev, func; /* Bus inside domain, device and function > > */ + int irq; /* IRQ number */ > > + uint16_t region_number; /* number of active regions */ > > + > > + /* Port I/O or MMIO Regions */ > > + PCIRegion regions[MAX_IO_REGIONS]; > > + int config_fd; > > +} PCIDevRegions; > > + > > +typedef struct assigned_dev_region_s { > > + target_phys_addr_t e_physbase; > > + uint32_t memory_index; > > + void *r_virtbase; /* mmapped access address */ > > + int num; /* our index within v_addrs[] */ > > + uint32_t e_size; /* emulated size of region in bytes */ > > + uint32_t r_size; /* real size of region in bytes */ > > + uint32_t debug; > > +} AssignedDevRegion; > > + > > +typedef struct assigned_dev_s { > > + PCIDevice dev; > > + int intpin; > > + uint8_t debug_flags; > > + AssignedDevRegion v_addrs[PCI_NUM_REGIONS]; > > + PCIDevRegions real_device; > > + int run; > > + int girq; > > + unsigned char h_busnr; > > + unsigned int h_devfn; > > + int bound; > > +} AssignedDevice; > > + > > +/* Initialization functions */ > > +PCIDevice *init_assigned_device(PCIBus *bus, int *index); > > +void add_assigned_device(const char *arg); > > +void assigned_dev_set_vector(int irq, int vector); > > +void assigned_dev_ack_mirq(int vector); > > + > > +#endif /* __DEVICE_ASSIGNMENT_H__ */ > > diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c > > index 6053103..4a611cc 100644 > > --- a/qemu/hw/pc.c > > +++ b/qemu/hw/pc.c > > @@ -32,6 +32,7 @@ > > #include "smbus.h" > > #include "boards.h" > > #include "console.h" > > +#include "device-assignment.h" > > > > #include "qemu-kvm.h" > > > > @@ -1006,6 +1007,14 @@ static void pc_init1(ram_addr_t ram_size, int > > vga_ram_size, } > > } > > > > + /* Initialize assigned devices */ > > + if (pci_enabled) { > > + int r = -1; > > + do { > > + init_assigned_device(pci_bus, &r); > > + } while (r >= 0); > > + } > > + > > rtc_state = rtc_init(0x70, i8259[8]); > > > > qemu_register_boot_set(pc_boot_set, rtc_state); > > diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c > > index 61ff0f6..e4e8386 100644 > > --- a/qemu/hw/pci.c > > +++ b/qemu/hw/pci.c > > @@ -50,6 +50,7 @@ struct PCIBus { > > > > static void pci_update_mappings(PCIDevice *d); > > static void pci_set_irq(void *opaque, int irq_num, int level); > > +void assigned_dev_update_irq(PCIDevice *d); > > > > target_phys_addr_t pci_mem_base; > > static int pci_irq_index; > > @@ -453,6 +454,12 @@ void pci_default_write_config(PCIDevice *d, > > val >>= 8; > > } > > > > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > > + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() && > > + address >= 0x60 && address <= 0x63) > > + assigned_dev_update_irq(d); > > +#endif > > + > > end = address + len; > > if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) { > > /* if the command register is modified, we must modify the > > mappings */ diff --git a/qemu/vl.c b/qemu/vl.c > > index 2fb8552..83f28c5 100644 > > --- a/qemu/vl.c > > +++ b/qemu/vl.c > > @@ -37,6 +37,7 @@ > > #include "qemu-char.h" > > #include "block.h" > > #include "audio/audio.h" > > +#include "hw/device-assignment.h" > > #include "migration.h" > > #include "balloon.h" > > #include "qemu-kvm.h" > > @@ -8469,6 +8470,12 @@ static void help(int exitcode) > > #endif > > "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n" > > "-no-kvm-pit disable KVM kernel mode PIT\n" > > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__) > > + "-pcidevice host=bus:dev.func[,dma=none][,name=\"string\"]\n" > > + " expose a PCI device to the guest OS.\n" > > + " dma=none: don't perform any dma translations > > (default is to use an iommu)\n" + " 'string' is > > used in log output.\n" > > +#endif > > #endif > > #ifdef TARGET_I386 > > "-std-vga simulate a standard VGA card with VESA Bochs > > Extensions\n" @@ -8592,6 +8599,9 @@ enum { > > QEMU_OPTION_no_kvm, > > QEMU_OPTION_no_kvm_irqchip, > > QEMU_OPTION_no_kvm_pit, > > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__) > > + QEMU_OPTION_pcidevice, > > +#endif > > QEMU_OPTION_no_reboot, > > QEMU_OPTION_no_shutdown, > > QEMU_OPTION_show_cursor, > > @@ -8680,6 +8690,9 @@ const QEMUOption qemu_options[] = { > > #endif > > { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip }, > > { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit }, > > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__) > > + { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice }, > > +#endif > > #endif > > #if defined(TARGET_PPC) || defined(TARGET_SPARC) > > { "g", 1, QEMU_OPTION_g }, > > @@ -9586,6 +9599,11 @@ int main(int argc, char **argv) > > kvm_pit = 0; > > break; > > } > > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__) > > + case QEMU_OPTION_pcidevice: > > + add_assigned_device(optarg); > > + break; > > +#endif > > #endif > > case QEMU_OPTION_usb: > > usb_enabled = 1; > > -- > > 1.5.4.3 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html