* [PATCH] enable PCI multiple-segments for pass-through device
@ 2010-01-29 6:38 Zhai, Edwin
2010-01-30 0:08 ` Chris Wright
0 siblings, 1 reply; 7+ messages in thread
From: Zhai, Edwin @ 2010-01-29 6:38 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: kvm@vger.kernel.org, Zhai, Edwin
[-- Attachment #1: Type: text/plain, Size: 202 bytes --]
These 2 patches enable optional parameter(default 0) - PCI segment(or
domain) besides BDF, when assigning PCI device to guest.
Signed-off-by: Zhai Edwin <edwin.zhai@intel.com>
--
best rgds,
edwin
[-- Attachment #2: kvm_vtd_multi_seg_kmod.patch --]
[-- Type: text/plain, Size: 2804 bytes --]
Index: linux-2.6/virt/kvm/assigned-dev.c
===================================================================
--- linux-2.6.orig/virt/kvm/assigned-dev.c
+++ linux-2.6/virt/kvm/assigned-dev.c
@@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(st
r = -ENOMEM;
goto out;
}
- dev = pci_get_bus_and_slot(assigned_dev->busnr,
+ dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+ assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
@@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(st
pci_reset_function(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
+ match->host_segnr = assigned_dev->segnr;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags;
Index: linux-2.6/virt/kvm/iommu.c
===================================================================
--- linux-2.6.orig/virt/kvm/iommu.c
+++ linux-2.6/virt/kvm/iommu.c
@@ -106,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
r = iommu_attach_device(domain, &pdev->dev);
if (r) {
- printk(KERN_ERR "assign device %x:%x.%x failed",
+ printk(KERN_ERR "assign device %x:%x:%x.%x failed",
+ pci_domain_nr(pdev->bus),
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
@@ -127,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
- printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
+ assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@@ -154,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
iommu_detach_device(domain, &pdev->dev);
- printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
+ printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
+ assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
Index: linux-2.6/include/linux/kvm.h
===================================================================
--- linux-2.6.orig/include/linux/kvm.h
+++ linux-2.6/include/linux/kvm.h
@@ -691,8 +691,9 @@ struct kvm_assigned_pci_dev {
__u32 busnr;
__u32 devfn;
__u32 flags;
+ __u32 segnr;
union {
- __u32 reserved[12];
+ __u32 reserved[11];
};
};
Index: linux-2.6/include/linux/kvm_host.h
===================================================================
--- linux-2.6.orig/include/linux/kvm_host.h
+++ linux-2.6/include/linux/kvm_host.h
@@ -400,6 +400,7 @@ struct kvm_assigned_dev_kernel {
struct work_struct interrupt_work;
struct list_head list;
int assigned_dev_id;
+ int host_segnr;
int host_busnr;
int host_devfn;
unsigned int entries_nr;
[-- Attachment #3: kvm_vtd_multi_seg_qemu.patch --]
[-- Type: text/plain, Size: 11119 bytes --]
Index: qemu-kvm/hw/device-assignment.c
===================================================================
--- qemu-kvm.orig/hw/device-assignment.c
+++ qemu-kvm/hw/device-assignment.c
@@ -595,8 +595,8 @@ static int get_real_device_id(const char
return get_real_id(devpath, "device", val);
}
-static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
- uint8_t r_dev, uint8_t r_func)
+static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
+ uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
{
char dir[128], name[128];
int fd, r = 0, v;
@@ -609,8 +609,8 @@ static int get_real_device(AssignedDevic
dev->region_number = 0;
- snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
- r_bus, r_dev, r_func);
+ snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
+ r_seg, r_bus, r_dev, r_func);
snprintf(name, sizeof(name), "%sconfig", dir);
@@ -752,9 +752,9 @@ static void free_assigned_device(Assigne
}
}
-static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
{
- return (uint32_t)bus << 8 | (uint32_t)devfn;
+ return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
}
static void assign_failed_examine(AssignedDevice *dev)
@@ -763,9 +763,8 @@ static void assign_failed_examine(Assign
uint16_t vendor_id, device_id;
int r;
- /* XXX implement multidomain */
- sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/",
- dev->host.bus, dev->host.dev, dev->host.func);
+ sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
sprintf(name, "%sdriver", dir);
@@ -782,19 +781,19 @@ static void assign_failed_examine(Assign
}
fprintf(stderr, "*** The driver '%s' is occupying your device "
- "%02x:%02x.%x.\n",
- ns, dev->host.bus, dev->host.dev, dev->host.func);
+ "%04x:%02x:%02x.%x.\n",
+ ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
fprintf(stderr, "***\n");
fprintf(stderr, "*** You can try the following commands to free it:\n");
fprintf(stderr, "***\n");
fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
"new_id\n", vendor_id, device_id);
- fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
"%s/unbind\n",
- dev->host.bus, dev->host.dev, dev->host.func, ns);
- fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns);
+ fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
"pci-stub/bind\n",
- dev->host.bus, dev->host.dev, dev->host.func);
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
"/remove_id\n", vendor_id, device_id);
fprintf(stderr, "***\n");
@@ -812,7 +811,8 @@ static int assign_device(AssignedDevice
memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
assigned_dev_data.assigned_dev_id =
- calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
+ assigned_dev_data.segnr = dev->h_segnr;
assigned_dev_data.busnr = dev->h_busnr;
assigned_dev_data.devfn = dev->h_devfn;
@@ -867,7 +867,7 @@ static int assign_irq(AssignedDevice *de
memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
assigned_irq_data.assigned_dev_id =
- calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
assigned_irq_data.guest_irq = irq;
assigned_irq_data.host_irq = dev->real_device.irq;
#ifdef KVM_CAP_ASSIGN_DEV_IRQ
@@ -908,7 +908,7 @@ static void deassign_device(AssignedDevi
memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
assigned_dev_data.assigned_dev_id =
- calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
if (r < 0)
@@ -964,7 +964,7 @@ static void assigned_dev_update_msi(PCID
memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
assigned_irq_data.assigned_dev_id =
- calc_assigned_dev_id(assigned_dev->h_busnr,
+ calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
(uint8_t)assigned_dev->h_devfn);
if (assigned_dev->irq_requested_type) {
@@ -1048,7 +1048,7 @@ static int assigned_dev_update_msix_mmio
fprintf(stderr, "MSI-X entry number is zero!\n");
return -EINVAL;
}
- msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr,
+ msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
(uint8_t)adev->h_devfn);
msix_nr.entry_nr = entries_nr;
r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
@@ -1121,7 +1121,7 @@ static void assigned_dev_update_msix(PCI
memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
assigned_irq_data.assigned_dev_id =
- calc_assigned_dev_id(assigned_dev->h_busnr,
+ calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
(uint8_t)assigned_dev->h_devfn);
if (assigned_dev->irq_requested_type) {
@@ -1317,12 +1317,13 @@ static int assigned_initfn(struct PCIDev
uint8_t e_device, e_intx;
int r;
- if (!dev->host.bus && !dev->host.dev && !dev->host.func) {
+ if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) {
qemu_error("pci-assign: error: no host device specified\n");
goto out;
}
- if (get_real_device(dev, dev->host.bus, dev->host.dev, dev->host.func)) {
+ if (get_real_device(dev, dev->host.seg, dev->host.bus,
+ dev->host.dev, dev->host.func)) {
qemu_error("pci-assign: Error: Couldn't get real device (%s)!\n",
dev->dev.qdev.id);
goto out;
@@ -1340,12 +1341,13 @@ static int assigned_initfn(struct PCIDev
dev->intpin = e_intx;
dev->run = 0;
dev->girq = 0;
+ dev->h_segnr = dev->host.seg;
dev->h_busnr = dev->host.bus;
dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
pacc = pci_alloc();
pci_init(pacc);
- dev->pdev = pci_get_dev(pacc, 0, dev->host.bus, dev->host.dev, dev->host.func);
+ dev->pdev = pci_get_dev(pacc, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
if (pci_enable_capability_support(pci_dev, 0, NULL,
assigned_device_pci_cap_write_config,
@@ -1392,7 +1394,7 @@ static int parse_hostaddr(DeviceState *d
PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
int rc;
- rc = pci_parse_host_devaddr(str, &ptr->bus, &ptr->dev, &ptr->func);
+ rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func);
if (rc != 0)
return -1;
return 0;
@@ -1583,8 +1585,8 @@ static void assigned_dev_load_option_rom
char rom_file[64];
snprintf(rom_file, sizeof(rom_file),
- "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom",
- dev->host.bus, dev->host.dev, dev->host.func);
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
if (access(rom_file, F_OK))
return;
Index: qemu-kvm/hw/device-assignment.h
===================================================================
--- qemu-kvm.orig/hw/device-assignment.h
+++ qemu-kvm/hw/device-assignment.h
@@ -37,6 +37,7 @@
#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
typedef struct PCIHostDevice {
+ int seg;
int bus;
int dev;
int func;
@@ -82,6 +83,7 @@ typedef struct AssignedDevice {
PCIDevRegions real_device;
int run;
int girq;
+ unsigned int h_segnr;
unsigned char h_busnr;
unsigned int h_devfn;
int irq_requested_type;
Index: qemu-kvm/hw/pci.c
===================================================================
--- qemu-kvm.orig/hw/pci.c
+++ qemu-kvm/hw/pci.c
@@ -479,21 +479,48 @@ static int pci_parse_devaddr(const char
}
/*
- * Parse device bdf in device assignment command:
+ * Parse device seg and bdf in device assignment command:
*
- * -pcidevice host=bus:dev.func
+ * -pcidevice host=[seg:]bus:dev.func
*
- * Parse <bus>:<slot>.<func> return -1 on error
+ * Parse [seg:]<bus>:<slot>.<func> return -1 on error
*/
-int pci_parse_host_devaddr(const char *addr, int *busp,
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
int *slotp, int *funcp)
{
const char *p;
char *e;
int val;
- int bus = 0, slot = 0, func = 0;
+ int seg = 0, bus = 0, slot = 0, func = 0;
+ /* parse optional seg */
p = addr;
+ val = 0;
+ while (1) {
+ p = strchr(p, ':');
+ if (p) {
+ val++;
+ p++;
+ } else
+ break;
+ }
+ if (val <= 0 || val > 2)
+ return -1;
+
+ p = addr;
+ if (val == 2) {
+ val = strtoul(p, &e, 16);
+ if (e == p)
+ return -1;
+ if (*e == ':') {
+ seg = val;
+ p = e + 1;
+ }
+ } else
+ seg = 0;
+
+
+ /* parse bdf */
val = strtoul(p, &e, 16);
if (e == p)
return -1;
@@ -515,12 +542,13 @@ int pci_parse_host_devaddr(const char *a
} else
return -1;
- if (bus > 0xff || slot > 0x1f || func > 0x7)
+ if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7)
return -1;
if (*e)
return -1;
+ *segp = seg;
*busp = bus;
*slotp = slot;
*funcp = func;
Index: qemu-kvm/hw/pci.h
===================================================================
--- qemu-kvm.orig/hw/pci.h
+++ qemu-kvm/hw/pci.h
@@ -275,7 +275,7 @@ PCIBus *pci_get_bus_devfn(int *devfnp, c
int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
unsigned *slotp);
-int pci_parse_host_devaddr(const char *addr, int *busp,
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
int *slotp, int *funcp);
void pci_info(Monitor *mon);
Index: qemu-kvm/kvm/include/linux/kvm.h
===================================================================
--- qemu-kvm.orig/kvm/include/linux/kvm.h
+++ qemu-kvm/kvm/include/linux/kvm.h
@@ -686,8 +686,9 @@ struct kvm_assigned_pci_dev {
__u32 busnr;
__u32 devfn;
__u32 flags;
+ __u32 segnr;
union {
- __u32 reserved[12];
+ __u32 reserved[11];
};
};
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-01-29 6:38 [PATCH] enable PCI multiple-segments for pass-through device Zhai, Edwin @ 2010-01-30 0:08 ` Chris Wright 2010-02-01 9:22 ` Zhai, Edwin 0 siblings, 1 reply; 7+ messages in thread From: Chris Wright @ 2010-01-30 0:08 UTC (permalink / raw) To: Zhai, Edwin; +Cc: Marcelo Tosatti, kvm@vger.kernel.org * Zhai, Edwin (edwin.zhai@intel.com) wrote: > These 2 patches enable optional parameter(default 0) - PCI segment(or > domain) besides BDF, when assigning PCI device to guest. > > Signed-off-by: Zhai Edwin <edwin.zhai@intel.com> Looks good. For compatibility, an old userspace will zero the padding and keep the default 0 segment when running w/ a new kernel, a new userspace won't be able to pass a non-zero segment value to an old kernel. If that latter bit matters, need a capability to express this change. Looks like qemu-options.hx needs an update too. Otherwise... Acked-by: Chris Wright <chrisw@sous-sol.org> ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-01-30 0:08 ` Chris Wright @ 2010-02-01 9:22 ` Zhai, Edwin 2010-02-01 19:20 ` Marcelo Tosatti 0 siblings, 1 reply; 7+ messages in thread From: Zhai, Edwin @ 2010-02-01 9:22 UTC (permalink / raw) To: Chris Wright; +Cc: Marcelo Tosatti, kvm@vger.kernel.org [-- Attachment #1: Type: text/plain, Size: 2849 bytes --] Wright, Thanks for your comments. I have updated the qemu-options.hx and used following new CAP. Updated version is also attached. Index: qemu-kvm/hw/device-assignment.c =================================================================== --- qemu-kvm.orig/hw/device-assignment.c +++ qemu-kvm/hw/device-assignment.c @@ -809,6 +809,16 @@ static int assign_device(AssignedDevice struct kvm_assigned_pci_dev assigned_dev_data; int r; +#ifdef KVM_CAP_PCI_SEGMENT + /* Only pass non-zero PCI segment to capable module */ + if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && + dev->h_segnr) { + fprintf(stderr, "Can't assign device inside non-zero PCI segment " + "as this KVM module doesn't support it.\n"); + return -ENODEV; + } +#endif + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); assigned_dev_data.assigned_dev_id = calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); Index: qemu-kvm/kvm/include/linux/kvm.h =================================================================== --- qemu-kvm.orig/kvm/include/linux/kvm.h +++ qemu-kvm/kvm/include/linux/kvm.h @@ -498,6 +498,8 @@ struct kvm_ioeventfd { #define KVM_CAP_S390_PSW 42 #define KVM_CAP_PPC_SEGSTATE 43 +#define KVM_CAP_PCI_SEGMENT 47 + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { Index: linux-2.6/include/linux/kvm.h =================================================================== --- linux-2.6.orig/include/linux/kvm.h +++ linux-2.6/include/linux/kvm.h @@ -500,6 +500,7 @@ struct kvm_ioeventfd { #define KVM_CAP_HYPERV 44 #define KVM_CAP_HYPERV_VAPIC 45 #define KVM_CAP_HYPERV_SPIN 46 +#define KVM_CAP_PCI_SEGMENT 47 #ifdef KVM_CAP_IRQ_ROUTING Index: linux-2.6/arch/x86/kvm/x86.c =================================================================== --- linux-2.6.orig/arch/x86/kvm/x86.c +++ linux-2.6/arch/x86/kvm/x86.c @@ -1569,6 +1569,7 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_PCI_SEGMENT: r = 1; break; case KVM_CAP_COALESCED_MMIO: Chris Wright wrote: > * Zhai, Edwin (edwin.zhai@intel.com) wrote: > > These 2 patches enable optional parameter(default 0) - PCI segment(or > > domain) besides BDF, when assigning PCI device to guest. > > > > Signed-off-by: Zhai Edwin <edwin.zhai@intel.com> > > Looks good. For compatibility, an old userspace will zero the padding > and keep the default 0 segment when running w/ a new kernel, a new > userspace won't be able to pass a non-zero segment value to an old > kernel. If that latter bit matters, need a capability to express this > change. Looks like qemu-options.hx needs an update too. Otherwise... > > Acked-by: Chris Wright <chrisw@sous-sol.org> > [-- Attachment #2: kvm_vtd_multi_seg_qemu_v2.patch --] [-- Type: application/octet-stream, Size: 12546 bytes --] Index: qemu-kvm/hw/device-assignment.c =================================================================== --- qemu-kvm.orig/hw/device-assignment.c +++ qemu-kvm/hw/device-assignment.c @@ -595,8 +595,8 @@ static int get_real_device_id(const char return get_real_id(devpath, "device", val); } -static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, - uint8_t r_dev, uint8_t r_func) +static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg, + uint8_t r_bus, uint8_t r_dev, uint8_t r_func) { char dir[128], name[128]; int fd, r = 0, v; @@ -609,8 +609,8 @@ static int get_real_device(AssignedDevic dev->region_number = 0; - snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/0000:%02x:%02x.%x/", - r_bus, r_dev, r_func); + snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", + r_seg, r_bus, r_dev, r_func); snprintf(name, sizeof(name), "%sconfig", dir); @@ -752,9 +752,9 @@ static void free_assigned_device(Assigne } } -static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) { - return (uint32_t)bus << 8 | (uint32_t)devfn; + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; } static void assign_failed_examine(AssignedDevice *dev) @@ -763,9 +763,8 @@ static void assign_failed_examine(Assign uint16_t vendor_id, device_id; int r; - /* XXX implement multidomain */ - sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/", - dev->host.bus, dev->host.dev, dev->host.func); + sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); sprintf(name, "%sdriver", dir); @@ -782,19 +781,19 @@ static void assign_failed_examine(Assign } fprintf(stderr, "*** The driver '%s' is occupying your device " - "%02x:%02x.%x.\n", - ns, dev->host.bus, dev->host.dev, dev->host.func); + "%04x:%02x:%02x.%x.\n", + ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); fprintf(stderr, "***\n"); fprintf(stderr, "*** You can try the following commands to free it:\n"); fprintf(stderr, "***\n"); fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/" "new_id\n", vendor_id, device_id); - fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" "%s/unbind\n", - dev->host.bus, dev->host.dev, dev->host.func, ns); - fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns); + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" "pci-stub/bind\n", - dev->host.bus, dev->host.dev, dev->host.func); + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub" "/remove_id\n", vendor_id, device_id); fprintf(stderr, "***\n"); @@ -810,9 +809,20 @@ static int assign_device(AssignedDevice struct kvm_assigned_pci_dev assigned_dev_data; int r; +#ifdef KVM_CAP_PCI_SEGMENT + /* Only pass non-zero PCI segment to capable module */ + if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && + dev->h_segnr) { + fprintf(stderr, "Can't assign device inside non-zero PCI segment " + "as this KVM module doesn't support it.\n"); + return -ENODEV; + } +#endif + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); assigned_dev_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); + assigned_dev_data.segnr = dev->h_segnr; assigned_dev_data.busnr = dev->h_busnr; assigned_dev_data.devfn = dev->h_devfn; @@ -867,7 +877,7 @@ static int assign_irq(AssignedDevice *de memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); assigned_irq_data.guest_irq = irq; assigned_irq_data.host_irq = dev->real_device.irq; #ifdef KVM_CAP_ASSIGN_DEV_IRQ @@ -908,7 +918,7 @@ static void deassign_device(AssignedDevi memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); assigned_dev_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data); if (r < 0) @@ -964,7 +974,7 @@ static void assigned_dev_update_msi(PCID memset(&assigned_irq_data, 0, sizeof assigned_irq_data); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(assigned_dev->h_busnr, + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn); if (assigned_dev->irq_requested_type) { @@ -1048,7 +1058,7 @@ static int assigned_dev_update_msix_mmio fprintf(stderr, "MSI-X entry number is zero!\n"); return -EINVAL; } - msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_busnr, + msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr, (uint8_t)adev->h_devfn); msix_nr.entry_nr = entries_nr; r = kvm_assign_set_msix_nr(kvm_context, &msix_nr); @@ -1121,7 +1131,7 @@ static void assigned_dev_update_msix(PCI memset(&assigned_irq_data, 0, sizeof assigned_irq_data); assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(assigned_dev->h_busnr, + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn); if (assigned_dev->irq_requested_type) { @@ -1317,12 +1327,13 @@ static int assigned_initfn(struct PCIDev uint8_t e_device, e_intx; int r; - if (!dev->host.bus && !dev->host.dev && !dev->host.func) { + if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) { qemu_error("pci-assign: error: no host device specified\n"); goto out; } - if (get_real_device(dev, dev->host.bus, dev->host.dev, dev->host.func)) { + if (get_real_device(dev, dev->host.seg, dev->host.bus, + dev->host.dev, dev->host.func)) { qemu_error("pci-assign: Error: Couldn't get real device (%s)!\n", dev->dev.qdev.id); goto out; @@ -1340,12 +1351,13 @@ static int assigned_initfn(struct PCIDev dev->intpin = e_intx; dev->run = 0; dev->girq = 0; + dev->h_segnr = dev->host.seg; dev->h_busnr = dev->host.bus; dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func); pacc = pci_alloc(); pci_init(pacc); - dev->pdev = pci_get_dev(pacc, 0, dev->host.bus, dev->host.dev, dev->host.func); + dev->pdev = pci_get_dev(pacc, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); if (pci_enable_capability_support(pci_dev, 0, NULL, assigned_device_pci_cap_write_config, @@ -1392,7 +1404,7 @@ static int parse_hostaddr(DeviceState *d PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop); int rc; - rc = pci_parse_host_devaddr(str, &ptr->bus, &ptr->dev, &ptr->func); + rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func); if (rc != 0) return -1; return 0; @@ -1512,8 +1524,8 @@ static void assigned_dev_load_option_rom char rom_file[64]; snprintf(rom_file, sizeof(rom_file), - "/sys/bus/pci/devices/0000:%02x:%02x.%01x/rom", - dev->host.bus, dev->host.dev, dev->host.func); + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); if (access(rom_file, F_OK)) return; Index: qemu-kvm/hw/device-assignment.h =================================================================== --- qemu-kvm.orig/hw/device-assignment.h +++ qemu-kvm/hw/device-assignment.h @@ -37,6 +37,7 @@ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) typedef struct PCIHostDevice { + int seg; int bus; int dev; int func; @@ -82,6 +83,7 @@ typedef struct AssignedDevice { PCIDevRegions real_device; int run; int girq; + unsigned int h_segnr; unsigned char h_busnr; unsigned int h_devfn; int irq_requested_type; Index: qemu-kvm/hw/pci.c =================================================================== --- qemu-kvm.orig/hw/pci.c +++ qemu-kvm/hw/pci.c @@ -480,21 +480,48 @@ static int pci_parse_devaddr(const char } /* - * Parse device bdf in device assignment command: + * Parse device seg and bdf in device assignment command: * - * -pcidevice host=bus:dev.func + * -pcidevice host=[seg:]bus:dev.func * - * Parse <bus>:<slot>.<func> return -1 on error + * Parse [seg:]<bus>:<slot>.<func> return -1 on error */ -int pci_parse_host_devaddr(const char *addr, int *busp, +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, int *slotp, int *funcp) { const char *p; char *e; int val; - int bus = 0, slot = 0, func = 0; + int seg = 0, bus = 0, slot = 0, func = 0; + /* parse optional seg */ p = addr; + val = 0; + while (1) { + p = strchr(p, ':'); + if (p) { + val++; + p++; + } else + break; + } + if (val <= 0 || val > 2) + return -1; + + p = addr; + if (val == 2) { + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + seg = val; + p = e + 1; + } + } else + seg = 0; + + + /* parse bdf */ val = strtoul(p, &e, 16); if (e == p) return -1; @@ -516,12 +543,13 @@ int pci_parse_host_devaddr(const char *a } else return -1; - if (bus > 0xff || slot > 0x1f || func > 0x7) + if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7) return -1; if (*e) return -1; + *segp = seg; *busp = bus; *slotp = slot; *funcp = func; Index: qemu-kvm/hw/pci.h =================================================================== --- qemu-kvm.orig/hw/pci.h +++ qemu-kvm/hw/pci.h @@ -276,7 +276,7 @@ PCIBus *pci_get_bus_devfn(int *devfnp, c int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, unsigned *slotp); -int pci_parse_host_devaddr(const char *addr, int *busp, +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, int *slotp, int *funcp); void pci_info(Monitor *mon); Index: qemu-kvm/kvm/include/linux/kvm.h =================================================================== --- qemu-kvm.orig/kvm/include/linux/kvm.h +++ qemu-kvm/kvm/include/linux/kvm.h @@ -498,6 +498,8 @@ struct kvm_ioeventfd { #define KVM_CAP_S390_PSW 42 #define KVM_CAP_PPC_SEGSTATE 43 +#define KVM_CAP_PCI_SEGMENT 47 + #ifdef KVM_CAP_IRQ_ROUTING struct kvm_irq_routing_irqchip { @@ -691,8 +693,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + __u32 segnr; union { - __u32 reserved[12]; + __u32 reserved[11]; }; }; Index: qemu-kvm/qemu-options.hx =================================================================== --- qemu-kvm.orig/qemu-options.hx +++ qemu-kvm/qemu-options.hx @@ -1977,7 +1977,7 @@ DEF("no-kvm-pit-reinjection", 0, QEMU_OP " disable KVM kernel mode PIT interrupt reinjection\n") #if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(TARGET_IA64) || defined(__linux__) DEF("pcidevice", HAS_ARG, QEMU_OPTION_pcidevice, - "-pcidevice host=bus:dev.func[,dma=none][,name=string]\n" + "-pcidevice host=[seg:]bus:dev.func[,dma=none][,name=string]\n" " expose a PCI device to the guest OS\n" " dma=none: don't perform any dma translations (default is to use an iommu)\n" " 'string' is used in log output\n") [-- Attachment #3: kvm_vtd_multi_seg_kmod_v2.patch --] [-- Type: application/octet-stream, Size: 3400 bytes --] Index: linux-2.6/virt/kvm/assigned-dev.c =================================================================== --- linux-2.6.orig/virt/kvm/assigned-dev.c +++ linux-2.6/virt/kvm/assigned-dev.c @@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(st r = -ENOMEM; goto out; } - dev = pci_get_bus_and_slot(assigned_dev->busnr, + dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, + assigned_dev->busnr, assigned_dev->devfn); if (!dev) { printk(KERN_INFO "%s: host device not found\n", __func__); @@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(st pci_reset_function(dev); match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; match->host_devfn = assigned_dev->devfn; match->flags = assigned_dev->flags; Index: linux-2.6/virt/kvm/iommu.c =================================================================== --- linux-2.6.orig/virt/kvm/iommu.c +++ linux-2.6/virt/kvm/iommu.c @@ -106,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm, r = iommu_attach_device(domain, &pdev->dev); if (r) { - printk(KERN_ERR "assign device %x:%x.%x failed", + printk(KERN_ERR "assign device %x:%x:%x.%x failed", + pci_domain_nr(pdev->bus), pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); @@ -127,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } - printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); @@ -154,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); - printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", + assigned_dev->host_segnr, assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); Index: linux-2.6/include/linux/kvm.h =================================================================== --- linux-2.6.orig/include/linux/kvm.h +++ linux-2.6/include/linux/kvm.h @@ -500,6 +500,7 @@ struct kvm_ioeventfd { #define KVM_CAP_HYPERV 44 #define KVM_CAP_HYPERV_VAPIC 45 #define KVM_CAP_HYPERV_SPIN 46 +#define KVM_CAP_PCI_SEGMENT 47 #ifdef KVM_CAP_IRQ_ROUTING @@ -694,8 +695,9 @@ struct kvm_assigned_pci_dev { __u32 busnr; __u32 devfn; __u32 flags; + __u32 segnr; union { - __u32 reserved[12]; + __u32 reserved[11]; }; }; Index: linux-2.6/include/linux/kvm_host.h =================================================================== --- linux-2.6.orig/include/linux/kvm_host.h +++ linux-2.6/include/linux/kvm_host.h @@ -400,6 +400,7 @@ struct kvm_assigned_dev_kernel { struct work_struct interrupt_work; struct list_head list; int assigned_dev_id; + int host_segnr; int host_busnr; int host_devfn; unsigned int entries_nr; Index: linux-2.6/arch/x86/kvm/x86.c =================================================================== --- linux-2.6.orig/arch/x86/kvm/x86.c +++ linux-2.6/arch/x86/kvm/x86.c @@ -1569,6 +1569,7 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_PCI_SEGMENT: r = 1; break; case KVM_CAP_COALESCED_MMIO: ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-02-01 9:22 ` Zhai, Edwin @ 2010-02-01 19:20 ` Marcelo Tosatti 2010-02-01 23:27 ` Jan Kiszka 0 siblings, 1 reply; 7+ messages in thread From: Marcelo Tosatti @ 2010-02-01 19:20 UTC (permalink / raw) To: Zhai, Edwin; +Cc: Chris Wright, kvm@vger.kernel.org On Mon, Feb 01, 2010 at 05:22:47PM +0800, Zhai, Edwin wrote: > Wright, > Thanks for your comments. I have updated the qemu-options.hx and used > following new CAP. Updated version is also attached. Applied, thanks. Please send one patch per email, with patch inlined, in the future. ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-02-01 19:20 ` Marcelo Tosatti @ 2010-02-01 23:27 ` Jan Kiszka 2010-02-02 0:24 ` Zhai, Edwin 0 siblings, 1 reply; 7+ messages in thread From: Jan Kiszka @ 2010-02-01 23:27 UTC (permalink / raw) To: Marcelo Tosatti; +Cc: Zhai, Edwin, Chris Wright, kvm@vger.kernel.org [-- Attachment #1: Type: text/plain, Size: 413 bytes --] Marcelo Tosatti wrote: > On Mon, Feb 01, 2010 at 05:22:47PM +0800, Zhai, Edwin wrote: >> Wright, >> Thanks for your comments. I have updated the qemu-options.hx and used >> following new CAP. Updated version is also attached. > > Applied, thanks. > > Please send one patch per email, with patch inlined, in the future. > This commit doesn't build without KVM_CAP_PCI_SEGMENT, please fix. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 257 bytes --] ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-02-01 23:27 ` Jan Kiszka @ 2010-02-02 0:24 ` Zhai, Edwin 2010-02-02 0:31 ` Chris Wright 0 siblings, 1 reply; 7+ messages in thread From: Zhai, Edwin @ 2010-02-02 0:24 UTC (permalink / raw) To: Jan Kiszka; +Cc: Marcelo Tosatti, Chris Wright, kvm@vger.kernel.org Jan Kiszka wrote: > Marcelo Tosatti wrote: > >> >> Applied, thanks. >> >> Please send one patch per email, with patch inlined, in the future. >> >> > > This commit doesn't build without KVM_CAP_PCI_SEGMENT, please fix. > Jan, What does it mean? Can we update all files except the "kvm.h"? Thanks, > Jan > > -- best rgds, edwin ^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] enable PCI multiple-segments for pass-through device 2010-02-02 0:24 ` Zhai, Edwin @ 2010-02-02 0:31 ` Chris Wright 0 siblings, 0 replies; 7+ messages in thread From: Chris Wright @ 2010-02-02 0:31 UTC (permalink / raw) To: Zhai, Edwin Cc: Jan Kiszka, Marcelo Tosatti, Chris Wright, kvm@vger.kernel.org * Zhai, Edwin (edwin.zhai@intel.com) wrote: > Jan Kiszka wrote: >> Marcelo Tosatti wrote: >> >>> Applied, thanks. >>> >>> Please send one patch per email, with patch inlined, in the future. >>> >> >> This commit doesn't build without KVM_CAP_PCI_SEGMENT, please fix. > > What does it mean? Can we update all files except the "kvm.h"? I think Marcelo already fixed... ed880109 Fix device-assignment.c compilation without KVM_CAP_PCI_SEGMENT ^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2010-02-02 0:31 UTC | newest] Thread overview: 7+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-01-29 6:38 [PATCH] enable PCI multiple-segments for pass-through device Zhai, Edwin 2010-01-30 0:08 ` Chris Wright 2010-02-01 9:22 ` Zhai, Edwin 2010-02-01 19:20 ` Marcelo Tosatti 2010-02-01 23:27 ` Jan Kiszka 2010-02-02 0:24 ` Zhai, Edwin 2010-02-02 0:31 ` Chris Wright
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox