* [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available
@ 2010-07-20 22:11 Alex Williamson
2010-07-20 23:13 ` Chris Wright
2010-07-21 3:30 ` [PATCH v2] " Alex Williamson
0 siblings, 2 replies; 8+ messages in thread
From: Alex Williamson @ 2010-07-20 22:11 UTC (permalink / raw)
To: kvm; +Cc: ddutile, chrisw, alex.williamson
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions. This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt. For uid 0 callers, we use in/out directly to avoid any
compatibility issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Required kernel patch pending here:
http://www.spinics.net/lists/linux-pci/msg09389.html
hw/device-assignment.c | 131 ++++++++++++++++++++++++++++++++++++------------
hw/device-assignment.h | 1
2 files changed, 99 insertions(+), 33 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..37c1278 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,28 @@ static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
return region->u.r_baseport + (addr - region->e_physbase);
}
+static int assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+ uint32_t addr, int len, uint32_t *val,
+ int write)
+{
+ if (dev_region->region->resource_fd == -1)
+ return -1;
+
+ if (write) {
+ if (pwrite(dev_region->region->resource_fd, val, len,
+ (addr - dev_region->e_physbase)) != len) {
+ return -1;
+ }
+ } else {
+ if (pread(dev_region->region->resource_fd, val, len,
+ (addr - dev_region->e_physbase)) != len) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
{
@@ -77,7 +99,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outb(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 1) != 0) {
+ outb(value, r_pio);
+ }
}
static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
@@ -90,7 +114,9 @@ static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outw(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 1) != 0) {
+ outw(value, r_pio);
+ }
}
static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
@@ -103,7 +129,9 @@ static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outl(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 1) != 0) {
+ outl(value, r_pio);
+ }
}
static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
@@ -112,7 +140,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inb(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 0) != 0) {
+ value = inb(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -127,7 +157,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inw(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 0) != 0) {
+ value = inw(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -142,7 +174,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inl(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 0) != 0) {
+ value = inl(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -305,7 +339,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
addr, region->u.r_baseport, type, size, region_num);
- if (first_map) {
+ if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +620,46 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
slow_map ? assigned_dev_iomem_map_slow
: assigned_dev_iomem_map);
continue;
+ } else {
+ /* handle port io regions */
+ uint32_t val;
+ int ret;
+
+ /* Test kernel support for ioport resource read/write. Old
+ * kernels return EIO. New kernels only allow 1/2/4 byte reads
+ * so should return EINVAL for a 3 byte read */
+ ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
+ if (ret == 3) {
+ fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
+ abort();
+ } else if (errno == EIO) {
+ fprintf(stderr,
+ "pcisysfs does not support rw ioport resource\n");
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Unexpected return from ioport pread (%d) %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Root user can use direct access for compatibility */
+ if (getuid() == 0) {
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ }
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
}
- /* handle port io regions */
- pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
- pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
- pci_dev->v_addrs[i].r_size = cur_region->size;
- pci_dev->v_addrs[i].e_size = 0;
-
- pci_register_bar((PCIDevice *) pci_dev, i,
- cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
- assigned_dev_ioport_map);
-
- /* not relevant for port io */
- pci_dev->v_addrs[i].memory_index = 0;
}
/* success */
@@ -705,20 +766,22 @@ again:
continue;
if (flags & IORESOURCE_MEM) {
flags &= ~IORESOURCE_IO;
- if (r != PCI_ROM_SLOT) {
- snprintf(name, sizeof(name), "%sresource%d", dir, r);
- fd = open(name, O_RDWR);
- if (fd == -1)
- continue;
- rp->resource_fd = fd;
- }
- } else
+ } else {
flags &= ~IORESOURCE_PREFETCH;
+ }
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
rp->type = flags;
rp->valid = 1;
rp->base_addr = start;
rp->size = size;
+ pci_dev->v_addrs[r].region = rp;
DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
r, rp->size, start, rp->type, rp->resource_fd);
}
@@ -780,8 +843,10 @@ static void free_assigned_device(AssignedDevice *dev)
continue;
if (pci_region->type & IORESOURCE_IO) {
- kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
- continue;
+ if (pci_region->resource_fd < 0) {
+ kvm_remove_ioperm_data(region->u.r_baseport,
+ region->r_size);
+ }
} else if (pci_region->type & IORESOURCE_MEM) {
if (region->u.r_virtbase) {
if (region->memory_index) {
@@ -795,11 +860,11 @@ static void free_assigned_device(AssignedDevice *dev)
fprintf(stderr,
"Failed to unmap assigned device region: %s\n",
strerror(errno));
- if (pci_region->resource_fd >= 0) {
- close(pci_region->resource_fd);
- }
}
- }
+ }
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
}
if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 4e7fe87..9a3ea12 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -71,6 +71,7 @@ typedef struct {
int num; /* our index within v_addrs[] */
pcibus_t e_size; /* emulated size of region in bytes */
pcibus_t r_size; /* real size of region in bytes */
+ PCIRegion *region;
} AssignedDevRegion;
typedef struct AssignedDevice {
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-20 22:11 [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available Alex Williamson
@ 2010-07-20 23:13 ` Chris Wright
2010-07-21 8:17 ` Daniel P. Berrange
2010-07-21 3:30 ` [PATCH v2] " Alex Williamson
1 sibling, 1 reply; 8+ messages in thread
From: Chris Wright @ 2010-07-20 23:13 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, ddutile, chrisw
* Alex Williamson (alex.williamson@redhat.com) wrote:
> When supported by the host kernel, we can use read/write on the
> PCI sysfs resource file for I/O port regions. This allows us to
> avoid raw in/out commands and works with deprivileged guests via
> libvirt. For uid 0 callers, we use in/out directly to avoid any
> compatibility issues.
won't uid 0 test will fail if libvirt launches qemu with user set to
root (capabilities still get dropped)?
thanks,
-chris
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-20 22:11 [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available Alex Williamson
2010-07-20 23:13 ` Chris Wright
@ 2010-07-21 3:30 ` Alex Williamson
2010-07-21 14:24 ` [PATCH v3] " Alex Williamson
1 sibling, 1 reply; 8+ messages in thread
From: Alex Williamson @ 2010-07-21 3:30 UTC (permalink / raw)
To: kvm; +Cc: ddutile, chrisw, alex.williamson
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions. This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt. For uid 0 callers, we use in/out directly to avoid any
compatibility issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Required kernel patch pending here:
http://www.spinics.net/lists/linux-pci/msg09389.html
v2: Drop getuid() since it doesn't guarantee permissions
Don't use in/out as a fallback since we don't have permissions
Consolidate ioport read/write functions
hw/device-assignment.c | 205 ++++++++++++++++++++++++++++--------------------
hw/device-assignment.h | 1
2 files changed, 120 insertions(+), 86 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..2e141ac 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -62,93 +62,100 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
-static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
+static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+ uint32_t addr, int len, uint32_t *val)
{
- return region->u.r_baseport + (addr - region->e_physbase);
+ uint32_t ret = 0;
+ uint32_t offset = addr - dev_region->e_physbase;
+ int fd = dev_region->region->resource_fd;
+
+ if (fd >= 0) {
+ if (val) {
+ DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n",
+ *val, len, addr, offset);
+ if (pwrite(fd, val, len, offset) != len) {
+ fprintf(stderr, "%s - pwrite failed %s\n",
+ __func__, strerror(errno));
+ }
+ } else {
+ if (pread(fd, &ret, len, offset) != len) {
+ fprintf(stderr, "%s - pread failed %s\n",
+ __func__, strerror(errno));
+ ret = (1UL << (len * 8)) - 1;
+ }
+ DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n",
+ ret, len, addr, offset);
+ }
+ } else {
+ uint32_t port = offset + dev_region->u.r_baseport;
+
+ if (val) {
+ DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n",
+ *val, len, addr, port);
+ switch (len) {
+ case 1:
+ outb(*val, port);
+ break;
+ case 2:
+ outw(*val, port);
+ break;
+ case 4:
+ outl(*val, port);
+ break;
+ }
+ } else {
+ switch (len) {
+ case 1:
+ ret = inb(port);
+ break;
+ case 2:
+ ret = inw(port);
+ break;
+ case 4:
+ ret = inl(port);
+ break;
+ }
+ DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n",
+ ret, len, addr, port);
+ }
+ }
+ return ret;
}
static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outb(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 1, &value);
+ return;
}
static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outw(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 2, &value);
+ return;
}
static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outl(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 4, &value);
+ return;
}
static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inb(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 1, NULL);
}
static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inw(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 2, NULL);
}
static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inl(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 4, NULL);
}
static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
@@ -305,7 +312,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
addr, region->u.r_baseport, type, size, region_num);
- if (first_map) {
+ if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +593,41 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
slow_map ? assigned_dev_iomem_map_slow
: assigned_dev_iomem_map);
continue;
+ } else {
+ /* handle port io regions */
+ uint32_t val;
+ int ret;
+
+ /* Test kernel support for ioport resource read/write. Old
+ * kernels return EIO. New kernels only allow 1/2/4 byte reads
+ * so should return EINVAL for a 3 byte read */
+ ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
+ if (ret == 3) {
+ fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
+ abort();
+ } else if (errno == EIO) {
+ fprintf(stderr,
+ "pcisysfs does not support rw ioport resource\n");
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Unexpected return from ioport pread (%d) %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
}
- /* handle port io regions */
- pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
- pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
- pci_dev->v_addrs[i].r_size = cur_region->size;
- pci_dev->v_addrs[i].e_size = 0;
-
- pci_register_bar((PCIDevice *) pci_dev, i,
- cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
- assigned_dev_ioport_map);
-
- /* not relevant for port io */
- pci_dev->v_addrs[i].memory_index = 0;
}
/* success */
@@ -705,20 +734,22 @@ again:
continue;
if (flags & IORESOURCE_MEM) {
flags &= ~IORESOURCE_IO;
- if (r != PCI_ROM_SLOT) {
- snprintf(name, sizeof(name), "%sresource%d", dir, r);
- fd = open(name, O_RDWR);
- if (fd == -1)
- continue;
- rp->resource_fd = fd;
- }
- } else
+ } else {
flags &= ~IORESOURCE_PREFETCH;
+ }
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
rp->type = flags;
rp->valid = 1;
rp->base_addr = start;
rp->size = size;
+ pci_dev->v_addrs[r].region = rp;
DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
r, rp->size, start, rp->type, rp->resource_fd);
}
@@ -780,8 +811,10 @@ static void free_assigned_device(AssignedDevice *dev)
continue;
if (pci_region->type & IORESOURCE_IO) {
- kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
- continue;
+ if (pci_region->resource_fd < 0) {
+ kvm_remove_ioperm_data(region->u.r_baseport,
+ region->r_size);
+ }
} else if (pci_region->type & IORESOURCE_MEM) {
if (region->u.r_virtbase) {
if (region->memory_index) {
@@ -795,11 +828,11 @@ static void free_assigned_device(AssignedDevice *dev)
fprintf(stderr,
"Failed to unmap assigned device region: %s\n",
strerror(errno));
- if (pci_region->resource_fd >= 0) {
- close(pci_region->resource_fd);
- }
}
- }
+ }
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
}
if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 4e7fe87..9a3ea12 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -71,6 +71,7 @@ typedef struct {
int num; /* our index within v_addrs[] */
pcibus_t e_size; /* emulated size of region in bytes */
pcibus_t r_size; /* real size of region in bytes */
+ PCIRegion *region;
} AssignedDevRegion;
typedef struct AssignedDevice {
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-20 23:13 ` Chris Wright
@ 2010-07-21 8:17 ` Daniel P. Berrange
0 siblings, 0 replies; 8+ messages in thread
From: Daniel P. Berrange @ 2010-07-21 8:17 UTC (permalink / raw)
To: Chris Wright; +Cc: Alex Williamson, kvm, ddutile
On Tue, Jul 20, 2010 at 04:13:06PM -0700, Chris Wright wrote:
> * Alex Williamson (alex.williamson@redhat.com) wrote:
> > When supported by the host kernel, we can use read/write on the
> > PCI sysfs resource file for I/O port regions. This allows us to
> > avoid raw in/out commands and works with deprivileged guests via
> > libvirt. For uid 0 callers, we use in/out directly to avoid any
> > compatibility issues.
>
> won't uid 0 test will fail if libvirt launches qemu with user set to
> root (capabilities still get dropped)?
Yes, if the kernel is doing a CAP_SYS_ADMIN check (or similar), then
testing uid==0 is definitely wrong. You'd need to test have(CAP_SYS_ADMIN)
instead.
REgards,
Daniel
--
|: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :|
|: http://libvirt.org -o- http://virt-manager.org -o- http://deltacloud.org :|
|: http://autobuild.org -o- http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v3] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-21 3:30 ` [PATCH v2] " Alex Williamson
@ 2010-07-21 14:24 ` Alex Williamson
2010-07-23 21:47 ` [PATCH v4] " Alex Williamson
0 siblings, 1 reply; 8+ messages in thread
From: Alex Williamson @ 2010-07-21 14:24 UTC (permalink / raw)
To: kvm; +Cc: ddutile, chrisw, alex.williamson
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions. This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt. For uid 0 callers, we use in/out directly to avoid any
compatibility issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Required kernel patch pending here:
http://www.spinics.net/lists/linux-pci/msg09389.html
v3: Fixup sysfs errno checking, anything except EINVAL should fall
back to in/out. This covers things like EPERM on the resource
file too.
v2: Drop getuid() since it doesn't guarantee permissions
Don't use in/out as a fallback since we don't have permissions
Consolidate ioport read/write functions
hw/device-assignment.c | 201 +++++++++++++++++++++++++++---------------------
hw/device-assignment.h | 1
2 files changed, 116 insertions(+), 86 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..c56870e 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -62,93 +62,100 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
-static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
+static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+ uint32_t addr, int len, uint32_t *val)
{
- return region->u.r_baseport + (addr - region->e_physbase);
+ uint32_t ret = 0;
+ uint32_t offset = addr - dev_region->e_physbase;
+ int fd = dev_region->region->resource_fd;
+
+ if (fd >= 0) {
+ if (val) {
+ DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n",
+ *val, len, addr, offset);
+ if (pwrite(fd, val, len, offset) != len) {
+ fprintf(stderr, "%s - pwrite failed %s\n",
+ __func__, strerror(errno));
+ }
+ } else {
+ if (pread(fd, &ret, len, offset) != len) {
+ fprintf(stderr, "%s - pread failed %s\n",
+ __func__, strerror(errno));
+ ret = (1UL << (len * 8)) - 1;
+ }
+ DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n",
+ ret, len, addr, offset);
+ }
+ } else {
+ uint32_t port = offset + dev_region->u.r_baseport;
+
+ if (val) {
+ DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n",
+ *val, len, addr, port);
+ switch (len) {
+ case 1:
+ outb(*val, port);
+ break;
+ case 2:
+ outw(*val, port);
+ break;
+ case 4:
+ outl(*val, port);
+ break;
+ }
+ } else {
+ switch (len) {
+ case 1:
+ ret = inb(port);
+ break;
+ case 2:
+ ret = inw(port);
+ break;
+ case 4:
+ ret = inl(port);
+ break;
+ }
+ DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n",
+ ret, len, addr, port);
+ }
+ }
+ return ret;
}
static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outb(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 1, &value);
+ return;
}
static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outw(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 2, &value);
+ return;
}
static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outl(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 4, &value);
+ return;
}
static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inb(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 1, NULL);
}
static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inw(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 2, NULL);
}
static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inl(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 4, NULL);
}
static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
@@ -305,7 +312,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
addr, region->u.r_baseport, type, size, region_num);
- if (first_map) {
+ if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +593,37 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
slow_map ? assigned_dev_iomem_map_slow
: assigned_dev_iomem_map);
continue;
+ } else {
+ /* handle port io regions */
+ uint32_t val;
+ int ret;
+
+ /* Test kernel support for ioport resource read/write. Old
+ * kernels return EIO. New kernels only allow 1/2/4 byte reads
+ * so should return EINVAL for a 3 byte read */
+ ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
+ if (ret == 3) {
+ fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
+ abort();
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Using raw in/out ioport access (sysfs - %s)\n",
+ strerror(errno));
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ }
+
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
}
- /* handle port io regions */
- pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
- pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
- pci_dev->v_addrs[i].r_size = cur_region->size;
- pci_dev->v_addrs[i].e_size = 0;
-
- pci_register_bar((PCIDevice *) pci_dev, i,
- cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
- assigned_dev_ioport_map);
-
- /* not relevant for port io */
- pci_dev->v_addrs[i].memory_index = 0;
}
/* success */
@@ -705,20 +730,22 @@ again:
continue;
if (flags & IORESOURCE_MEM) {
flags &= ~IORESOURCE_IO;
- if (r != PCI_ROM_SLOT) {
- snprintf(name, sizeof(name), "%sresource%d", dir, r);
- fd = open(name, O_RDWR);
- if (fd == -1)
- continue;
- rp->resource_fd = fd;
- }
- } else
+ } else {
flags &= ~IORESOURCE_PREFETCH;
+ }
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
rp->type = flags;
rp->valid = 1;
rp->base_addr = start;
rp->size = size;
+ pci_dev->v_addrs[r].region = rp;
DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
r, rp->size, start, rp->type, rp->resource_fd);
}
@@ -780,8 +807,10 @@ static void free_assigned_device(AssignedDevice *dev)
continue;
if (pci_region->type & IORESOURCE_IO) {
- kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
- continue;
+ if (pci_region->resource_fd < 0) {
+ kvm_remove_ioperm_data(region->u.r_baseport,
+ region->r_size);
+ }
} else if (pci_region->type & IORESOURCE_MEM) {
if (region->u.r_virtbase) {
if (region->memory_index) {
@@ -795,11 +824,11 @@ static void free_assigned_device(AssignedDevice *dev)
fprintf(stderr,
"Failed to unmap assigned device region: %s\n",
strerror(errno));
- if (pci_region->resource_fd >= 0) {
- close(pci_region->resource_fd);
- }
}
- }
+ }
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
}
if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 4e7fe87..9a3ea12 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -71,6 +71,7 @@ typedef struct {
int num; /* our index within v_addrs[] */
pcibus_t e_size; /* emulated size of region in bytes */
pcibus_t r_size; /* real size of region in bytes */
+ PCIRegion *region;
} AssignedDevRegion;
typedef struct AssignedDevice {
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v4] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-21 14:24 ` [PATCH v3] " Alex Williamson
@ 2010-07-23 21:47 ` Alex Williamson
2010-07-23 23:01 ` Chris Wright
2010-07-27 20:37 ` Marcelo Tosatti
0 siblings, 2 replies; 8+ messages in thread
From: Alex Williamson @ 2010-07-23 21:47 UTC (permalink / raw)
To: kvm; +Cc: ddutile, chrisw, alex.williamson
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions. This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Required kernel patch pending here:
http://www.spinics.net/lists/linux-pci/msg09389.html
v4: Remove commit sentence about uid since we removed the code
in v2. Identical patch to v3. Also note preliminary
agreement to include kerenl patch:
http://www.spinics.net/lists/linux-pci/msg09412.html
v3: Fixup sysfs errno checking, anything except EINVAL should fall
back to in/out. This covers things like EPERM on the resource
file too.
v2: Drop getuid() since it doesn't guarantee permissions
Don't use in/out as a fallback since we don't have permissions
Consolidate ioport read/write functions
hw/device-assignment.c | 201 +++++++++++++++++++++++++++---------------------
hw/device-assignment.h | 1
2 files changed, 116 insertions(+), 86 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..c56870e 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -62,93 +62,100 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
-static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
+static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+ uint32_t addr, int len, uint32_t *val)
{
- return region->u.r_baseport + (addr - region->e_physbase);
+ uint32_t ret = 0;
+ uint32_t offset = addr - dev_region->e_physbase;
+ int fd = dev_region->region->resource_fd;
+
+ if (fd >= 0) {
+ if (val) {
+ DEBUG("pwrite val=%x, len=%d, e_phys=%x, offset=%x\n",
+ *val, len, addr, offset);
+ if (pwrite(fd, val, len, offset) != len) {
+ fprintf(stderr, "%s - pwrite failed %s\n",
+ __func__, strerror(errno));
+ }
+ } else {
+ if (pread(fd, &ret, len, offset) != len) {
+ fprintf(stderr, "%s - pread failed %s\n",
+ __func__, strerror(errno));
+ ret = (1UL << (len * 8)) - 1;
+ }
+ DEBUG("pread ret=%x, len=%d, e_phys=%x, offset=%x\n",
+ ret, len, addr, offset);
+ }
+ } else {
+ uint32_t port = offset + dev_region->u.r_baseport;
+
+ if (val) {
+ DEBUG("out val=%x, len=%d, e_phys=%x, host=%x\n",
+ *val, len, addr, port);
+ switch (len) {
+ case 1:
+ outb(*val, port);
+ break;
+ case 2:
+ outw(*val, port);
+ break;
+ case 4:
+ outl(*val, port);
+ break;
+ }
+ } else {
+ switch (len) {
+ case 1:
+ ret = inb(port);
+ break;
+ case 2:
+ ret = inw(port);
+ break;
+ case 4:
+ ret = inl(port);
+ break;
+ }
+ DEBUG("in val=%x, len=%d, e_phys=%x, host=%x\n",
+ ret, len, addr, port);
+ }
+ }
+ return ret;
}
static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outb(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 1, &value);
+ return;
}
static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outw(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 2, &value);
+ return;
}
static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
uint32_t value)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- outl(value, r_pio);
+ assigned_dev_ioport_rw(opaque, addr, 4, &value);
+ return;
}
static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inb(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 1, NULL);
}
static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inw(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 2, NULL);
}
static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
{
- AssignedDevRegion *r_access = opaque;
- uint32_t r_pio = guest_to_host_ioport(r_access, addr);
- uint32_t value;
-
- value = inl(r_pio);
-
- DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
- r_pio, (int)r_access->e_physbase,
- (unsigned long)r_access->u.r_baseport, value);
-
- return value;
+ return assigned_dev_ioport_rw(opaque, addr, 4, NULL);
}
static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
@@ -305,7 +312,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
addr, region->u.r_baseport, type, size, region_num);
- if (first_map) {
+ if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +593,37 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
slow_map ? assigned_dev_iomem_map_slow
: assigned_dev_iomem_map);
continue;
+ } else {
+ /* handle port io regions */
+ uint32_t val;
+ int ret;
+
+ /* Test kernel support for ioport resource read/write. Old
+ * kernels return EIO. New kernels only allow 1/2/4 byte reads
+ * so should return EINVAL for a 3 byte read */
+ ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
+ if (ret == 3) {
+ fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
+ abort();
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Using raw in/out ioport access (sysfs - %s)\n",
+ strerror(errno));
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ }
+
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
}
- /* handle port io regions */
- pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
- pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
- pci_dev->v_addrs[i].r_size = cur_region->size;
- pci_dev->v_addrs[i].e_size = 0;
-
- pci_register_bar((PCIDevice *) pci_dev, i,
- cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
- assigned_dev_ioport_map);
-
- /* not relevant for port io */
- pci_dev->v_addrs[i].memory_index = 0;
}
/* success */
@@ -705,20 +730,22 @@ again:
continue;
if (flags & IORESOURCE_MEM) {
flags &= ~IORESOURCE_IO;
- if (r != PCI_ROM_SLOT) {
- snprintf(name, sizeof(name), "%sresource%d", dir, r);
- fd = open(name, O_RDWR);
- if (fd == -1)
- continue;
- rp->resource_fd = fd;
- }
- } else
+ } else {
flags &= ~IORESOURCE_PREFETCH;
+ }
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
rp->type = flags;
rp->valid = 1;
rp->base_addr = start;
rp->size = size;
+ pci_dev->v_addrs[r].region = rp;
DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
r, rp->size, start, rp->type, rp->resource_fd);
}
@@ -780,8 +807,10 @@ static void free_assigned_device(AssignedDevice *dev)
continue;
if (pci_region->type & IORESOURCE_IO) {
- kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
- continue;
+ if (pci_region->resource_fd < 0) {
+ kvm_remove_ioperm_data(region->u.r_baseport,
+ region->r_size);
+ }
} else if (pci_region->type & IORESOURCE_MEM) {
if (region->u.r_virtbase) {
if (region->memory_index) {
@@ -795,11 +824,11 @@ static void free_assigned_device(AssignedDevice *dev)
fprintf(stderr,
"Failed to unmap assigned device region: %s\n",
strerror(errno));
- if (pci_region->resource_fd >= 0) {
- close(pci_region->resource_fd);
- }
}
- }
+ }
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
}
if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 4e7fe87..9a3ea12 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -71,6 +71,7 @@ typedef struct {
int num; /* our index within v_addrs[] */
pcibus_t e_size; /* emulated size of region in bytes */
pcibus_t r_size; /* real size of region in bytes */
+ PCIRegion *region;
} AssignedDevRegion;
typedef struct AssignedDevice {
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v4] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-23 21:47 ` [PATCH v4] " Alex Williamson
@ 2010-07-23 23:01 ` Chris Wright
2010-07-27 20:37 ` Marcelo Tosatti
1 sibling, 0 replies; 8+ messages in thread
From: Chris Wright @ 2010-07-23 23:01 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, ddutile, chrisw
* Alex Williamson (alex.williamson@redhat.com) wrote:
> When supported by the host kernel, we can use read/write on the
> PCI sysfs resource file for I/O port regions. This allows us to
> avoid raw in/out commands and works with deprivileged guests via
> libvirt.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Chris Wright <chrisw@redhat.com>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v4] device-assignment: Use PCI I/O port sysfs resource file when available
2010-07-23 21:47 ` [PATCH v4] " Alex Williamson
2010-07-23 23:01 ` Chris Wright
@ 2010-07-27 20:37 ` Marcelo Tosatti
1 sibling, 0 replies; 8+ messages in thread
From: Marcelo Tosatti @ 2010-07-27 20:37 UTC (permalink / raw)
To: Alex Williamson; +Cc: kvm, ddutile, chrisw
On Fri, Jul 23, 2010 at 03:47:18PM -0600, Alex Williamson wrote:
> When supported by the host kernel, we can use read/write on the
> PCI sysfs resource file for I/O port regions. This allows us to
> avoid raw in/out commands and works with deprivileged guests via
> libvirt.
>
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>
> Required kernel patch pending here:
> http://www.spinics.net/lists/linux-pci/msg09389.html
>
> v4: Remove commit sentence about uid since we removed the code
> in v2. Identical patch to v3. Also note preliminary
> agreement to include kerenl patch:
> http://www.spinics.net/lists/linux-pci/msg09412.html
>
> v3: Fixup sysfs errno checking, anything except EINVAL should fall
> back to in/out. This covers things like EPERM on the resource
> file too.
>
> v2: Drop getuid() since it doesn't guarantee permissions
> Don't use in/out as a fallback since we don't have permissions
> Consolidate ioport read/write functions
>
> hw/device-assignment.c | 201 +++++++++++++++++++++++++++---------------------
> hw/device-assignment.h | 1
> 2 files changed, 116 insertions(+), 86 deletions(-)
Applied, thanks.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2010-07-27 21:22 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-20 22:11 [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available Alex Williamson
2010-07-20 23:13 ` Chris Wright
2010-07-21 8:17 ` Daniel P. Berrange
2010-07-21 3:30 ` [PATCH v2] " Alex Williamson
2010-07-21 14:24 ` [PATCH v3] " Alex Williamson
2010-07-23 21:47 ` [PATCH v4] " Alex Williamson
2010-07-23 23:01 ` Chris Wright
2010-07-27 20:37 ` Marcelo Tosatti
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox