From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Zhai, Edwin" Subject: [PATCH] enable PCI multiple-segments for pass-through device Date: Fri, 29 Jan 2010 14:38:44 +0800 Message-ID: <4B628274.5020005@intel.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------060704070102030209030908" Cc: "kvm@vger.kernel.org" , "Zhai, Edwin" To: Marcelo Tosatti Return-path: Received: from mga09.intel.com ([134.134.136.24]:32646 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751062Ab0A2Gio (ORCPT ); Fri, 29 Jan 2010 01:38:44 -0500 Sender: kvm-owner@vger.kernel.org List-ID: This is a multi-part message in MIME format. --------------060704070102030209030908 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit These 2 patches enable optional parameter(default 0) - PCI segment(or domain) besides BDF, when assigning PCI device to guest. Signed-off-by: Zhai Edwin -- best rgds, edwin --------------060704070102030209030908 Content-Type: text/plain; name="kvm_vtd_multi_seg_kmod.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="kvm_vtd_multi_seg_kmod.patch" Index: linux-2.6/virt/kvm/assigned-dev.c =================================================================== --- linux-2.6.orig/virt/kvm/assigned-dev.c +++ linux-2.6/virt/kvm/assigned-dev.c @@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(st r = -ENOMEM; goto out; } - dev = pci_get_bus_and_slot(assigned_dev->busnr, + dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, + assigned_dev->busnr, assigned_dev->devfn); if (!dev) { printk(KERN_INFO "%s: host device not found\n", __func__); @@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(st pci_reset_function(dev); match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; match->host_devfn = assigned_dev->devfn; match->flags = assigned_dev->flags; Index: linux-2.6/virt/kvm/iommu.c =================================================================== --- linux-2.6.orig/virt/kvm/iommu.c +++ linux-2.6/virt/kvm/iommu.c @@ -106,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm, r = iommu_attach_device(domain, &pdev->dev); if (r) { - printk(KERN_ERR "assign device %x:%x.%x failed", + printk(KERN_ERR "assign device %x:%x:%x.%x failed", + pci_domain_nr(pdev->bus), pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); @@ -127,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } - printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); @@ -154,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); - printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); Index: linux-2.6/include/linux/kvm.h =================================================================== --- linux-2.6.orig/include/linux/kvm.h +++ linux-2.6/include/linux/kvm.h @@ -691,8 +691,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + __u32 segnr; union { - __u32 reserved[12]; + __u32 reserved[11]; }; }; Index: linux-2.6/include/linux/kvm_host.h =================================================================== --- linux-2.6.orig/include/linux/kvm_host.h +++ linux-2.6/include/linux/kvm_host.h @@ -400,6 +400,7 @@ struct kvm_assigned_dev_kernel { struct work_struct interrupt_work; struct list_head list; int assigned_dev_id; + int host_segnr; int host_busnr; int host_devfn; unsigned int entries_nr; --------------060704070102030209030908 Content-Type: text/plain; name="kvm_vtd_multi_seg_qemu.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="kvm_vtd_multi_seg_qemu.patch" Index: qemu-kvm/hw/device-assignment.c =================================================================== --- qemu-kvm.orig/hw/device-assignment.c +++ qemu-kvm/hw/device-assignment.c @@ -595,8 +595,8 @@ static int get_real_device_id(const char return get_real_id(devpath, "device", val); } -static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, - uint8_t r_dev, uint8_t r_func) +static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg, + uint8_t r_bus, uint8_t r_dev, uint8_t r_func) { char dir[128], name[128]; int fd, r = 0, v; @@ -609,8 +609,8 @@ static int get_real_device(AssignedDevic dev->region_number = 0; - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/", - r_bus, r_dev, r_func); + snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", + r_seg, r_bus, r_dev, r_func); snprintf(name, sizeof(name), "%sconfig", dir); @@ -752,9 +752,9 @@ static void free_assigned_device(Assigne } } -static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) { - return (uint32_t)bus << 8 | (uint32_t)devfn; + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; } static void assign_failed_examine(AssignedDevice *dev) @@ -763,9 +763,8 @@ static void assign_failed_examine(Assign uint16_t vendor_id, device_id; int r; - /* XXX implement multidomain */ - sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/", - dev->host.bus, dev->host.dev, dev->host.func); + sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); sprintf(name, "%sdriver", dir); @@ -782,19 +781,19 @@ static void assign_failed_examine(Assign } fprintf(stderr, "*** The driver '%s' is occupying your device " - "%02x:%02x.%x.\n", - ns, dev->host.bus, dev->host.dev, dev->host.func); + "%04x:%02x:%02x.%x.\n", + ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); fprintf(stderr, "***\n"); fprintf(stderr, "*** You can try the following commands to free it:\n"); fprintf(stderr, "***\n"); fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/" "new_id\n", vendor_id, device_id); - fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" "%s/unbind\n", - dev->host.bus, dev->host.dev, dev->host.func, ns); - fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns); + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" "pci-stub/bind\n", - dev->host.bus, dev->host.dev, dev->host.func); + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub" "/remove_id\n", vendor_id, device_id); fprintf(stderr, "***\n"); @@ -812,7 +811,8 @@ static int assign_device(AssignedDevice memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); assigned_dev_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); + assigned_dev_data.segnr = dev->h_segnr; assigned_dev_data.busnr = dev->h_busnr; assigned_dev_data.devfn = dev->h_devfn; @@ -867,7 +867,7 @@ static int assign_irq(AssignedDevice *de memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); assigned_irq_data.guest_irq = irq; assigned_irq_data.host_irq = dev->real_device.irq; #ifdef KVM_CAP_ASSIGN_DEV_IRQ @@ -908,7 +908,7 @@ static void deassign_device(AssignedDevi memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); assigned_dev_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data); if (r < 0) @@ -964,7 +964,7 @@ static void assigned_dev_update_msi(PCID memset(&assigned_irq_data, 0, sizeof assigned_irq_data); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(assigned_dev->h_busnr, + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn); if (assigned_dev->irq_requested_type) { @@ -1048,7 +1048,7 @@ static int assigned_dev_update_msix_mmio fprintf(stderr, "MSI-X entry number is zero!\n"); return -EINVAL; } - msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr, + msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr, (uint8_t)adev->h_devfn); msix_nr.entry_nr = entries_nr; r = kvm_assign_set_msix_nr(kvm_context, &msix_nr); @@ -1121,7 +1121,7 @@ static void assigned_dev_update_msix(PCI memset(&assigned_irq_data, 0, sizeof assigned_irq_data); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(assigned_dev->h_busnr, + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn); if (assigned_dev->irq_requested_type) { @@ -1317,12 +1317,13 @@ static int assigned_initfn(struct PCIDev uint8_t e_device, e_intx; int r; - if (!dev->host.bus && !dev->host.dev && !dev->host.func) { + if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) { qemu_error("pci-assign: error: no host device specified\n"); goto out; } - if (get_real_device(dev, dev->host.bus, dev->host.dev, dev->host.func)) { + if (get_real_device(dev, dev->host.seg, dev->host.bus, + dev->host.dev, dev->host.func)) { qemu_error("pci-assign: Error: Couldn't get real device (%s)!\n", dev->dev.qdev.id); goto out; @@ -1340,12 +1341,13 @@ static int assigned_initfn(struct PCIDev dev->intpin = e_intx; dev->run = 0; dev->girq = 0; + dev->h_segnr = dev->host.seg; dev->h_busnr = dev->host.bus; dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func); pacc = pci_alloc(); pci_init(pacc); - dev->pdev = pci_get_dev(pacc, 0, dev->host.bus, dev->host.dev, dev->host.func); + dev->pdev = pci_get_dev(pacc, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); if (pci_enable_capability_support(pci_dev, 0, NULL, assigned_device_pci_cap_write_config, @@ -1392,7 +1394,7 @@ static int parse_hostaddr(DeviceState *d PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop); int rc; - rc = pci_parse_host_devaddr(str, &ptr->bus, &ptr->dev, &ptr->func); + rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func); if (rc != 0) return -1; return 0; @@ -1583,8 +1585,8 @@ static void assigned_dev_load_option_rom char rom_file[64]; snprintf(rom_file, sizeof(rom_file), - "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom", - dev->host.bus, dev->host.dev, dev->host.func); + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); if (access(rom_file, F_OK)) return; Index: qemu-kvm/hw/device-assignment.h =================================================================== --- qemu-kvm.orig/hw/device-assignment.h +++ qemu-kvm/hw/device-assignment.h @@ -37,6 +37,7 @@ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) typedef struct PCIHostDevice { + int seg; int bus; int dev; int func; @@ -82,6 +83,7 @@ typedef struct AssignedDevice { PCIDevRegions real_device; int run; int girq; + unsigned int h_segnr; unsigned char h_busnr; unsigned int h_devfn; int irq_requested_type; Index: qemu-kvm/hw/pci.c =================================================================== --- qemu-kvm.orig/hw/pci.c +++ qemu-kvm/hw/pci.c @@ -479,21 +479,48 @@ static int pci_parse_devaddr(const char } /* - * Parse device bdf in device assignment command: + * Parse device seg and bdf in device assignment command: * - * -pcidevice host=bus:dev.func + * -pcidevice host=[seg:]bus:dev.func * - * Parse :. return -1 on error + * Parse [seg:]:. return -1 on error */ -int pci_parse_host_devaddr(const char *addr, int *busp, +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, int *slotp, int *funcp) { const char *p; char *e; int val; - int bus = 0, slot = 0, func = 0; + int seg = 0, bus = 0, slot = 0, func = 0; + /* parse optional seg */ p = addr; + val = 0; + while (1) { + p = strchr(p, ':'); + if (p) { + val++; + p++; + } else + break; + } + if (val <= 0 || val > 2) + return -1; + + p = addr; + if (val == 2) { + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + seg = val; + p = e + 1; + } + } else + seg = 0; + + + /* parse bdf */ val = strtoul(p, &e, 16); if (e == p) return -1; @@ -515,12 +542,13 @@ int pci_parse_host_devaddr(const char *a } else return -1; - if (bus > 0xff || slot > 0x1f || func > 0x7) + if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7) return -1; if (*e) return -1; + *segp = seg; *busp = bus; *slotp = slot; *funcp = func; Index: qemu-kvm/hw/pci.h =================================================================== --- qemu-kvm.orig/hw/pci.h +++ qemu-kvm/hw/pci.h @@ -275,7 +275,7 @@ PCIBus *pci_get_bus_devfn(int *devfnp, c int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, unsigned *slotp); -int pci_parse_host_devaddr(const char *addr, int *busp, +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, int *slotp, int *funcp); void pci_info(Monitor *mon); Index: qemu-kvm/kvm/include/linux/kvm.h =================================================================== --- qemu-kvm.orig/kvm/include/linux/kvm.h +++ qemu-kvm/kvm/include/linux/kvm.h @@ -686,8 +686,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + __u32 segnr; union { - __u32 reserved[12]; + __u32 reserved[11]; }; }; --------------060704070102030209030908--