From: Alex Williamson <alex.williamson@redhat.com>
To: kvm@vger.kernel.org
Cc: ddutile@redhat.com, chrisw@redhat.com, alex.williamson@redhat.com
Subject: [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available
Date: Tue, 20 Jul 2010 16:11:16 -0600 [thread overview]
Message-ID: <20100720221045.3576.3435.stgit@localhost6.localdomain6> (raw)
When supported by the host kernel, we can use read/write on the
PCI sysfs resource file for I/O port regions. This allows us to
avoid raw in/out commands and works with deprivileged guests via
libvirt. For uid 0 callers, we use in/out directly to avoid any
compatibility issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
Required kernel patch pending here:
http://www.spinics.net/lists/linux-pci/msg09389.html
hw/device-assignment.c | 131 ++++++++++++++++++++++++++++++++++++------------
hw/device-assignment.h | 1
2 files changed, 99 insertions(+), 33 deletions(-)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 2bba22f..37c1278 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -67,6 +67,28 @@ static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
return region->u.r_baseport + (addr - region->e_physbase);
}
+static int assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
+ uint32_t addr, int len, uint32_t *val,
+ int write)
+{
+ if (dev_region->region->resource_fd == -1)
+ return -1;
+
+ if (write) {
+ if (pwrite(dev_region->region->resource_fd, val, len,
+ (addr - dev_region->e_physbase)) != len) {
+ return -1;
+ }
+ } else {
+ if (pread(dev_region->region->resource_fd, val, len,
+ (addr - dev_region->e_physbase)) != len) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
uint32_t value)
{
@@ -77,7 +99,9 @@ static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outb(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 1) != 0) {
+ outb(value, r_pio);
+ }
}
static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
@@ -90,7 +114,9 @@ static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outw(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 1) != 0) {
+ outw(value, r_pio);
+ }
}
static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
@@ -103,7 +129,9 @@ static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
r_pio, (int)r_access->e_physbase,
(unsigned long)r_access->u.r_baseport, value);
- outl(value, r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 1) != 0) {
+ outl(value, r_pio);
+ }
}
static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
@@ -112,7 +140,9 @@ static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inb(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 1, &value, 0) != 0) {
+ value = inb(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -127,7 +157,9 @@ static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inw(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 2, &value, 0) != 0) {
+ value = inw(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -142,7 +174,9 @@ static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
uint32_t r_pio = guest_to_host_ioport(r_access, addr);
uint32_t value;
- value = inl(r_pio);
+ if (assigned_dev_ioport_rw(r_access, addr, 4, &value, 0) != 0) {
+ value = inl(r_pio);
+ }
DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
r_pio, (int)r_access->e_physbase,
@@ -305,7 +339,7 @@ static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
addr, region->u.r_baseport, type, size, region_num);
- if (first_map) {
+ if (first_map && region->region->resource_fd < 0) {
struct ioperm_data *data;
data = qemu_mallocz(sizeof(struct ioperm_data));
@@ -586,19 +620,46 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
slow_map ? assigned_dev_iomem_map_slow
: assigned_dev_iomem_map);
continue;
+ } else {
+ /* handle port io regions */
+ uint32_t val;
+ int ret;
+
+ /* Test kernel support for ioport resource read/write. Old
+ * kernels return EIO. New kernels only allow 1/2/4 byte reads
+ * so should return EINVAL for a 3 byte read */
+ ret = pread(pci_dev->v_addrs[i].region->resource_fd, &val, 3, 0);
+ if (ret == 3) {
+ fprintf(stderr, "I/O port resource supports 3 byte read?!\n");
+ abort();
+ } else if (errno == EIO) {
+ fprintf(stderr,
+ "pcisysfs does not support rw ioport resource\n");
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Unexpected return from ioport pread (%d) %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Root user can use direct access for compatibility */
+ if (getuid() == 0) {
+ close(pci_dev->v_addrs[i].region->resource_fd);
+ pci_dev->v_addrs[i].region->resource_fd = -1;
+ }
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
}
- /* handle port io regions */
- pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
- pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
- pci_dev->v_addrs[i].r_size = cur_region->size;
- pci_dev->v_addrs[i].e_size = 0;
-
- pci_register_bar((PCIDevice *) pci_dev, i,
- cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
- assigned_dev_ioport_map);
-
- /* not relevant for port io */
- pci_dev->v_addrs[i].memory_index = 0;
}
/* success */
@@ -705,20 +766,22 @@ again:
continue;
if (flags & IORESOURCE_MEM) {
flags &= ~IORESOURCE_IO;
- if (r != PCI_ROM_SLOT) {
- snprintf(name, sizeof(name), "%sresource%d", dir, r);
- fd = open(name, O_RDWR);
- if (fd == -1)
- continue;
- rp->resource_fd = fd;
- }
- } else
+ } else {
flags &= ~IORESOURCE_PREFETCH;
+ }
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
rp->type = flags;
rp->valid = 1;
rp->base_addr = start;
rp->size = size;
+ pci_dev->v_addrs[r].region = rp;
DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
r, rp->size, start, rp->type, rp->resource_fd);
}
@@ -780,8 +843,10 @@ static void free_assigned_device(AssignedDevice *dev)
continue;
if (pci_region->type & IORESOURCE_IO) {
- kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
- continue;
+ if (pci_region->resource_fd < 0) {
+ kvm_remove_ioperm_data(region->u.r_baseport,
+ region->r_size);
+ }
} else if (pci_region->type & IORESOURCE_MEM) {
if (region->u.r_virtbase) {
if (region->memory_index) {
@@ -795,11 +860,11 @@ static void free_assigned_device(AssignedDevice *dev)
fprintf(stderr,
"Failed to unmap assigned device region: %s\n",
strerror(errno));
- if (pci_region->resource_fd >= 0) {
- close(pci_region->resource_fd);
- }
}
- }
+ }
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
}
if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 4e7fe87..9a3ea12 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -71,6 +71,7 @@ typedef struct {
int num; /* our index within v_addrs[] */
pcibus_t e_size; /* emulated size of region in bytes */
pcibus_t r_size; /* real size of region in bytes */
+ PCIRegion *region;
} AssignedDevRegion;
typedef struct AssignedDevice {
next reply other threads:[~2010-07-20 22:11 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-20 22:11 Alex Williamson [this message]
2010-07-20 23:13 ` [PATCH] device-assignment: Use PCI I/O port sysfs resource file when available Chris Wright
2010-07-21 8:17 ` Daniel P. Berrange
2010-07-21 3:30 ` [PATCH v2] " Alex Williamson
2010-07-21 14:24 ` [PATCH v3] " Alex Williamson
2010-07-23 21:47 ` [PATCH v4] " Alex Williamson
2010-07-23 23:01 ` Chris Wright
2010-07-27 20:37 ` Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100720221045.3576.3435.stgit@localhost6.localdomain6 \
--to=alex.williamson@redhat.com \
--cc=chrisw@redhat.com \
--cc=ddutile@redhat.com \
--cc=kvm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.