* [PATCH 1/3] Enable non page boundary BAR device assignment
2009-12-17 15:04 [PATCH 0/3] Device Assignment fixes Alexander Graf
@ 2009-12-17 15:04 ` Alexander Graf
2009-12-17 15:04 ` [PATCH 2/3] Split off sysfs id retrieval Alexander Graf
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: Alexander Graf @ 2009-12-17 15:04 UTC (permalink / raw)
To: kvm list; +Cc: Gleb Natapov, Michael S. Tsirkin, Muli Ben-Yehuda, Chris Wright
While trying to get device passthrough working with an emulex hba, kvm
refused to pass it through because it has a BAR of 256 bytes:
Region 0: Memory at d2100000 (64-bit, non-prefetchable) [size=4K]
Region 2: Memory at d2101000 (64-bit, non-prefetchable) [size=256]
Region 4: I/O ports at b100 [size=256]
Since the page boundary is an arbitrary optimization to allow 1:1 mapping of
physical to virtual addresses, we can still take the old MMIO callback route.
So let's add a second code path that allows for size & 0xFFF != 0 sized regions
by looping it through userspace.
I verified that it works by passing through an e1000 with this additional patch
applied and the card acted the same way it did without this patch:
map_func = assigned_dev_iomem_map;
- if (cur_region->size & 0xFFF) {
+ if (i != PCI_ROM_SLOT){
fprintf(stderr, "PCI region %d at address 0x%llx "
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
- don't use map_func function pointer
- use the same code for mmap on fast and slow path
v2 -> v3:
- address mst's comments
---
hw/device-assignment.c | 117 +++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 112 insertions(+), 5 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 13a86bb..000fa61 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -148,6 +148,105 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
return value;
}
+static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint8_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint16_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint32_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint8_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
+ *out = val;
+}
+
+static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint16_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
+ *out = val;
+}
+
+static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint32_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
+ *out = val;
+}
+
+static CPUWriteMemoryFunc * const slow_bar_write[] = {
+ &slow_bar_writeb,
+ &slow_bar_writew,
+ &slow_bar_writel
+};
+
+static CPUReadMemoryFunc * const slow_bar_read[] = {
+ &slow_bar_readb,
+ &slow_bar_readw,
+ &slow_bar_readl
+};
+
+static void assigned_dev_iomem_map_slow(PCIDevice *pci_dev, int region_num,
+ pcibus_t e_phys, pcibus_t e_size,
+ int type)
+{
+ AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ PCIRegion *real_region = &r_dev->real_device.regions[region_num];
+ int m;
+
+ DEBUG("slow map\n");
+ m = cpu_register_io_memory(slow_bar_read, slow_bar_write, region);
+ cpu_register_physical_memory(e_phys, e_size, m);
+
+ /* MSI-X MMIO page */
+ if ((e_size > 0) &&
+ real_region->base_addr <= r_dev->msix_table_addr &&
+ real_region->base_addr + real_region->size >= r_dev->msix_table_addr) {
+ int offset = r_dev->msix_table_addr - real_region->base_addr;
+
+ cpu_register_physical_memory(e_phys + offset,
+ TARGET_PAGE_SIZE, r_dev->mmio_index);
+ }
+}
+
static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
pcibus_t e_phys, pcibus_t e_size, int type)
{
@@ -381,15 +480,22 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
/* handle memory io regions */
if (cur_region->type & IORESOURCE_MEM) {
+ int slow_map = 0;
int t = cur_region->type & IORESOURCE_PREFETCH
? PCI_BASE_ADDRESS_MEM_PREFETCH
: PCI_BASE_ADDRESS_SPACE_MEMORY;
+
if (cur_region->size & 0xFFF) {
- fprintf(stderr, "Unable to assign device: PCI region %d "
- "at address 0x%llx has size 0x%x, "
- " which is not a multiple of 4K\n",
+ fprintf(stderr, "PCI region %d at address 0x%llx "
+ "has size 0x%x, which is not a multiple of 4K. "
+ "You might experience some performance hit due to that.\n",
i, (unsigned long long)cur_region->base_addr,
cur_region->size);
+ slow_map = 1;
+ }
+
+ if (slow_map && (i == PCI_ROM_SLOT)) {
+ fprintf(stderr, "ROM not aligned - can't continue\n");
return -1;
}
@@ -405,7 +511,7 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
} else {
pci_dev->v_addrs[i].u.r_virtbase =
mmap(NULL,
- (cur_region->size + 0xFFF) & 0xFFFFF000,
+ cur_region->size,
PROT_WRITE | PROT_READ, MAP_SHARED,
cur_region->resource_fd, (off_t) 0);
}
@@ -434,7 +540,8 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
pci_register_bar((PCIDevice *) pci_dev, i,
cur_region->size, t,
- assigned_dev_iomem_map);
+ slow_map ? assigned_dev_iomem_map_slow
+ : assigned_dev_iomem_map);
continue;
}
/* handle port io regions */
--
1.6.0.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/3] Split off sysfs id retrieval
2009-12-17 15:04 [PATCH 0/3] Device Assignment fixes Alexander Graf
2009-12-17 15:04 ` [PATCH 1/3] Enable non page boundary BAR device assignment Alexander Graf
@ 2009-12-17 15:04 ` Alexander Graf
2009-12-17 15:04 ` [PATCH 3/3] Inform users about busy device assignment attempt Alexander Graf
2009-12-22 10:08 ` [PATCH 0/3] Device Assignment fixes Avi Kivity
3 siblings, 0 replies; 6+ messages in thread
From: Alexander Graf @ 2009-12-17 15:04 UTC (permalink / raw)
To: kvm list; +Cc: Gleb Natapov, Michael S. Tsirkin, Muli Ben-Yehuda, Chris Wright
To retreive device and vendor ID from a PCI device, we need to read a
sysfs file. That code is currently hand written at least two times,
the later patch introducing two more calls.
So let's move that out to a function.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
hw/device-assignment.c | 66 ++++++++++++++++++++++++++++++++----------------
1 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 000fa61..566671c 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -562,14 +562,46 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
return 0;
}
+static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
+{
+ FILE *f;
+ char name[128];
+ long id;
+
+ snprintf(name, sizeof(name), "%s%s", devpath, idname);
+ f = fopen(name, "r");
+ if (f == NULL) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return -1;
+ }
+ if (fscanf(f, "%li\n", &id) == 1) {
+ *val = id;
+ } else {
+ return -1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int get_real_vendor_id(const char *devpath, uint16_t *val)
+{
+ return get_real_id(devpath, "vendor", val);
+}
+
+static int get_real_device_id(const char *devpath, uint16_t *val)
+{
+ return get_real_id(devpath, "device", val);
+}
+
static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
uint8_t r_dev, uint8_t r_func)
{
char dir[128], name[128];
- int fd, r = 0;
+ int fd, r = 0, v;
FILE *f;
unsigned long long start, end, size, flags;
- unsigned long id;
+ uint16_t id;
struct stat statbuf;
PCIRegion *rp;
PCIDevRegions *dev = &pci_dev->real_device;
@@ -635,31 +667,21 @@ again:
fclose(f);
- /* read and fill device ID */
- snprintf(name, sizeof(name), "%svendor", dir);
- f = fopen(name, "r");
- if (f == NULL) {
- fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ /* read and fill vendor ID */
+ v = get_real_vendor_id(dir, &id);
+ if (v) {
return 1;
}
- if (fscanf(f, "%li\n", &id) == 1) {
- pci_dev->dev.config[0] = id & 0xff;
- pci_dev->dev.config[1] = (id & 0xff00) >> 8;
- }
- fclose(f);
+ pci_dev->dev.config[0] = id & 0xff;
+ pci_dev->dev.config[1] = (id & 0xff00) >> 8;
- /* read and fill vendor ID */
- snprintf(name, sizeof(name), "%sdevice", dir);
- f = fopen(name, "r");
- if (f == NULL) {
- fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ /* read and fill device ID */
+ v = get_real_device_id(dir, &id);
+ if (v) {
return 1;
}
- if (fscanf(f, "%li\n", &id) == 1) {
- pci_dev->dev.config[2] = id & 0xff;
- pci_dev->dev.config[3] = (id & 0xff00) >> 8;
- }
- fclose(f);
+ pci_dev->dev.config[2] = id & 0xff;
+ pci_dev->dev.config[3] = (id & 0xff00) >> 8;
/* dealing with virtual function device */
snprintf(name, sizeof(name), "%sphysfn/", dir);
--
1.6.0.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/3] Inform users about busy device assignment attempt
2009-12-17 15:04 [PATCH 0/3] Device Assignment fixes Alexander Graf
2009-12-17 15:04 ` [PATCH 1/3] Enable non page boundary BAR device assignment Alexander Graf
2009-12-17 15:04 ` [PATCH 2/3] Split off sysfs id retrieval Alexander Graf
@ 2009-12-17 15:04 ` Alexander Graf
2009-12-22 10:08 ` [PATCH 0/3] Device Assignment fixes Avi Kivity
3 siblings, 0 replies; 6+ messages in thread
From: Alexander Graf @ 2009-12-17 15:04 UTC (permalink / raw)
To: kvm list
Cc: Gleb Natapov, Michael S. Tsirkin, Muli Ben-Yehuda, Chris Wright,
Daniel P. Berrange
When using -pcidevice on a device that is already in use by a kernel driver
all the user gets is the following (very useful) information:
Failed to assign device "04:00.0" : Device or resource busy
Failed to deassign device "04:00.0" : Invalid argument
Error initializing device pci-assign
Since I usually prefer to have my computer do the thinking for me, I figured
it might be a good idea to check and see if a device is actually used by a
driver. If so, tell the user.
So with this patch applied you get the following output:
Failed to assign device "04:00.0" : Device or resource busy
*** The driver 'igb' is occupying your device 04:00.0.
***
*** You can try the following commands to free it:
***
*** $ echo "8086 150a" > /sys/bus/pci/drivers/pci-stub/new_id
*** $ echo "0000:04:00.0" > /sys/bus/pci/drivers/igb/unbind
*** $ echo "0000:04:00.0" > /sys/bus/pci/drivers/pci-stub/bind
*** $ echo "8086 150a" > /sys/bus/pci/drivers/pci-stub/remove_id
***
Failed to deassign device "04:00.0" : Invalid argument
Error initializing device pci-assign
That should keep people like me from doing the most obvious misuses :-).
CC: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
v1 -> v2:
- add more helpful guidance thanks to Daniel Berrange
v2 -> v3:
- clear name variable before using it, thus 0-terminating the string
- fix region numbers
- use correct unbind/bind names
v3 -> v4:
- split id retrieval part
- mst comments
v4 -> v5:
- address chris' comments
---
hw/device-assignment.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 566671c..d6d44eb 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -756,6 +756,54 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
return (uint32_t)bus << 8 | (uint32_t)devfn;
}
+static void assign_failed_examine(AssignedDevice *dev)
+{
+ char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
+ uint16_t vendor_id, device_id;
+ int r;
+
+ /* XXX implement multidomain */
+ sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/",
+ dev->host.bus, dev->host.dev, dev->host.func);
+
+ sprintf(name, "%sdriver", dir);
+
+ r = readlink(name, driver, sizeof(driver));
+ if ((r <= 0) || r >= sizeof(driver) || !(ns = strrchr(driver, '/'))) {
+ goto fail;
+ }
+
+ ns++;
+
+ if (get_real_vendor_id(dir, &vendor_id) ||
+ get_real_device_id(dir, &device_id)) {
+ goto fail;
+ }
+
+ fprintf(stderr, "*** The driver '%s' is occupying your device "
+ "%02x:%02x.%x.\n",
+ ns, dev->host.bus, dev->host.dev, dev->host.func);
+ fprintf(stderr, "***\n");
+ fprintf(stderr, "*** You can try the following commands to free it:\n");
+ fprintf(stderr, "***\n");
+ fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
+ "new_id\n", vendor_id, device_id);
+ fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ "%s/unbind\n",
+ dev->host.bus, dev->host.dev, dev->host.func, ns);
+ fprintf(stderr, "*** $ echo \"0000:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ "pci-stub/bind\n",
+ dev->host.bus, dev->host.dev, dev->host.func);
+ fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
+ "/remove_id\n", vendor_id, device_id);
+ fprintf(stderr, "***\n");
+
+ return;
+
+fail:
+ fprintf(stderr, "Couldn't find out why.\n");
+}
+
static int assign_device(AssignedDevice *dev)
{
struct kvm_assigned_pci_dev assigned_dev_data;
@@ -781,9 +829,18 @@ static int assign_device(AssignedDevice *dev)
#endif
r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
- if (r < 0)
- fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
+ if (r < 0) {
+ fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
dev->dev.qdev.id, strerror(-r));
+
+ switch (r) {
+ case -EBUSY:
+ assign_failed_examine(dev);
+ break;
+ default:
+ break;
+ }
+ }
return r;
}
--
1.6.0.2
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 0/3] Device Assignment fixes
2009-12-17 15:04 [PATCH 0/3] Device Assignment fixes Alexander Graf
` (2 preceding siblings ...)
2009-12-17 15:04 ` [PATCH 3/3] Inform users about busy device assignment attempt Alexander Graf
@ 2009-12-22 10:08 ` Avi Kivity
3 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2009-12-22 10:08 UTC (permalink / raw)
To: Alexander Graf
Cc: kvm list, Gleb Natapov, Michael S. Tsirkin, Muli Ben-Yehuda,
Chris Wright
On 12/17/2009 05:04 PM, Alexander Graf wrote:
> While trying out I stumbled across several issues in the device assignment
> code.
>
> This set addresses the most major ones. Namely allowing passthrough of non page
> aligned BAR region (like on lpfc) and telling users what to do when their
> device is in use.
>
>
Applied, thanks.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 6+ messages in thread