* [PATCH v3] IO: Intelligent device lookup on bus
@ 2011-07-24 6:15 Sasha Levin
2011-07-27 11:35 ` Avi Kivity
0 siblings, 1 reply; 4+ messages in thread
From: Sasha Levin @ 2011-07-24 6:15 UTC (permalink / raw)
To: kvm; +Cc: Sasha Levin, Avi Kivity, Marcelo Tosatti
Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
is to call the read or write callback for each device registered
on the bus until we find a device which handles it.
Since the number of devices on a bus can be significant due to ioeventfds
and coalesced MMIO zones, this leads to a lot of overhead on each IO
operation.
Instead of registering devices, we now register ranges which points to
a device. Lookup is done using an efficient bsearch instead of a linear
search.
Performance test was conducted by comparing exit count per second with
200 ioeventfds created on one byte and the guest is trying to access a
different byte continuously (triggering usermode exits).
Before the patch the guest has achieved 259k exits per second, after the
patch the guest does 274k exits per second.
Changes in V3:
- Small fix in error path of i8259.
Changes in V2:
- Only register a device once.
- Support for multiple devices using same range.
- Added performance benchmark.
- Using bsearch() and sort() found in /lib/.
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
arch/x86/kvm/i8254.c | 5 +-
arch/x86/kvm/i8259.c | 56 ++++++++++++++++++----
arch/x86/kvm/irq.h | 4 +-
arch/x86/kvm/x86.c | 6 ++-
include/linux/kvm_host.h | 18 ++++----
virt/kvm/coalesced_mmio.c | 3 +-
virt/kvm/eventfd.c | 3 +-
virt/kvm/ioapic.c | 3 +-
virt/kvm/kvm_main.c | 112 ++++++++++++++++++++++++++++++++++++++++-----
9 files changed, 171 insertions(+), 39 deletions(-)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..094e057 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
- &pit->speaker_dev);
+ KVM_SPEAKER_BASE_ADDRESS, 4,
+ &pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 19fe855..9e18698 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -459,15 +459,27 @@ static int picdev_in_range(gpa_t addr)
}
}
-static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
+static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
{
- return container_of(dev, struct kvm_pic, dev);
+ switch (addr) {
+ case 0x20:
+ case 0x21:
+ return container_of(dev, struct kvm_pic, dev_master);
+ case 0xa0:
+ case 0xa1:
+ return container_of(dev, struct kvm_pic, dev_slave);
+ case 0x4d0:
+ case 0x4d1:
+ return container_of(dev, struct kvm_pic, dev_eclr);
+ }
+
+ return NULL;
}
static int picdev_write(struct kvm_io_device *this,
gpa_t addr, int len, const void *val)
{
- struct kvm_pic *s = to_pic(this);
+ struct kvm_pic *s = to_pic(this, addr);
unsigned char data = *(unsigned char *)val;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
@@ -497,7 +509,7 @@ static int picdev_write(struct kvm_io_device *this,
static int picdev_read(struct kvm_io_device *this,
gpa_t addr, int len, void *val)
{
- struct kvm_pic *s = to_pic(this);
+ struct kvm_pic *s = to_pic(this, addr);
unsigned char data = 0;
if (!picdev_in_range(addr))
return -EOPNOTSUPP;
@@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
/*
* Initialize PIO device
*/
- kvm_iodevice_init(&s->dev, &picdev_ops);
+ kvm_iodevice_init(&s->dev_master, &picdev_ops);
+ kvm_iodevice_init(&s->dev_slave, &picdev_ops);
+ kvm_iodevice_init(&s->dev_eclr, &picdev_ops);
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, &s->dev_master);
+ if (ret < 0)
+ goto fail_unlock;
+
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
+ if (ret < 0)
+ goto fail_unlock;
+
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+ if (ret < 0)
+ goto fail_unlock;
+
mutex_unlock(&kvm->slots_lock);
- if (ret < 0) {
- kfree(s);
- return NULL;
- }
return s;
+
+fail_unlock:
+
+ mutex_unlock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_eclr);
+
+ kfree(s);
+
+ return NULL;
}
void kvm_destroy_pic(struct kvm *kvm)
@@ -577,7 +609,9 @@ void kvm_destroy_pic(struct kvm *kvm)
struct kvm_pic *vpic = kvm->arch.vpic;
if (vpic) {
- kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
kvm->arch.vpic = NULL;
kfree(vpic);
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 53e2d08..2086f2b 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -66,7 +66,9 @@ struct kvm_pic {
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
int output; /* intr from master PIC */
- struct kvm_io_device dev;
+ struct kvm_io_device dev_master;
+ struct kvm_io_device dev_slave;
+ struct kvm_io_device dev_eclr;
void (*ack_notifier)(void *opaque, int irq);
unsigned long irq_states[16];
};
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e80f0d7..4cea9cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3561,7 +3561,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (r) {
mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
- &vpic->dev);
+ &vpic->dev_master);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+ &vpic->dev_slave);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+ &vpic->dev_eclr);
mutex_unlock(&kvm->slots_lock);
kfree(vpic);
goto create_irqchip_unlock;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ff4d406..d0e42f3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,16 +55,16 @@ struct kvm;
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
-/*
- * It would be nice to use something smarter than a linear search, TBD...
- * Thankfully we dont expect many devices to register (famous last words :),
- * so until then it will suffice. At least its abstracted so we can change
- * in one place.
- */
+struct kvm_io_range {
+ gpa_t addr;
+ int len;
+ struct kvm_io_device *dev;
+};
+
struct kvm_io_bus {
int dev_count;
#define NR_IOBUS_DEVS 300
- struct kvm_io_device *devs[NR_IOBUS_DEVS];
+ struct kvm_io_range range[NR_IOBUS_DEVS];
};
enum kvm_bus {
@@ -77,8 +77,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
void *val);
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev);
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev);
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 2316ec1..a6ec206 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -141,7 +141,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
dev->zone = *zone;
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
+ zone->size, &dev->dev);
if (ret < 0)
goto out_free_dev;
list_add_tail(&dev->list, &kvm->coalesced_zones);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 73358d2..f59c1e8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
- ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
+ ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
+ &p->dev);
if (ret < 0)
goto unlock_fail;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8df1ca1..86aeea1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+ IOAPIC_MEM_LENGTH, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aefdda3..0c228f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,8 @@
#include <linux/srcu.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/bsearch.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
int i;
for (i = 0; i < bus->dev_count; i++) {
- struct kvm_io_device *pos = bus->devs[i];
+ struct kvm_io_device *pos = bus->range[i].dev;
kvm_iodevice_destructor(pos);
}
kfree(bus);
}
+int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+{
+ const struct kvm_io_range *r1 = p1;
+ const struct kvm_io_range *r2 = p2;
+
+ if (r1->addr < r2->addr)
+ return -1;
+ if (r1->addr + r1->len > r2->addr + r2->len)
+ return 1;
+ return 0;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+ gpa_t addr, int len)
+{
+ int i;
+
+ if (bus->dev_count == NR_IOBUS_DEVS)
+ return -ENOSPC;
+
+ bus->range[bus->dev_count++] = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ .dev = dev,
+ };
+
+ sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
+ kvm_io_bus_sort_cmp, NULL);
+
+ return 0;
+}
+
+int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+ gpa_t addr, int len)
+{
+ struct kvm_io_range *range, key;
+ int off;
+
+ key = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
+
+ range = bsearch(&key, bus->range, bus->dev_count,
+ sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
+ if (range == NULL)
+ return -ENOENT;
+
+ off = range - bus->range;
+
+ while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+ off--;
+
+ return off;
+}
+
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
@@ -2416,19 +2488,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev)
{
struct kvm_io_bus *new_bus, *bus;
@@ -2440,7 +2526,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (!new_bus)
return -ENOMEM;
memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
- new_bus->devs[new_bus->dev_count++] = dev;
+ kvm_io_bus_insert_dev(new_bus, dev, addr, len);
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
@@ -2464,9 +2550,11 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
r = -ENOENT;
for (i = 0; i < new_bus->dev_count; i++)
- if (new_bus->devs[i] == dev) {
+ if (new_bus->range[i].dev == dev) {
r = 0;
- new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
+ new_bus->range[i] = new_bus->range[--new_bus->dev_count];
+ sort(new_bus->range, new_bus->dev_count, sizeof(struct kvm_io_range),
+ kvm_io_bus_sort_cmp, NULL);
break;
}
--
1.7.6
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v3] IO: Intelligent device lookup on bus
2011-07-24 6:15 [PATCH v3] IO: Intelligent device lookup on bus Sasha Levin
@ 2011-07-27 11:35 ` Avi Kivity
2011-07-27 12:01 ` Sasha Levin
0 siblings, 1 reply; 4+ messages in thread
From: Avi Kivity @ 2011-07-27 11:35 UTC (permalink / raw)
To: Sasha Levin; +Cc: kvm, Marcelo Tosatti
On 07/24/2011 09:15 AM, Sasha Levin wrote:
> Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
> is to call the read or write callback for each device registered
> on the bus until we find a device which handles it.
>
> Since the number of devices on a bus can be significant due to ioeventfds
> and coalesced MMIO zones, this leads to a lot of overhead on each IO
> operation.
>
> Instead of registering devices, we now register ranges which points to
> a device. Lookup is done using an efficient bsearch instead of a linear
> search.
>
> Performance test was conducted by comparing exit count per second with
> 200 ioeventfds created on one byte and the guest is trying to access a
> different byte continuously (triggering usermode exits).
> Before the patch the guest has achieved 259k exits per second, after the
> patch the guest does 274k exits per second.
>
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index efad723..094e057 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> kvm_register_irq_mask_notifier(kvm, 0,&pit->mask_notifier);
>
> kvm_iodevice_init(&pit->dev,&pit_dev_ops);
> - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&pit->dev);
> + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH,&pit->dev);
Long line.
>
> -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
> {
> - return container_of(dev, struct kvm_pic, dev);
> + switch (addr) {
> + case 0x20:
> + case 0x21:
> + return container_of(dev, struct kvm_pic, dev_master);
> + case 0xa0:
> + case 0xa1:
> + return container_of(dev, struct kvm_pic, dev_slave);
> + case 0x4d0:
> + case 0x4d1:
> + return container_of(dev, struct kvm_pic, dev_eclr);
> + }
> +
> + return NULL;
> }
Somewhat ugly. I think
int picdev_write_master(...)
{
return pcidev_write(container_of(...), ...);
}
is nicer, no?
> @@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> /*
> * Initialize PIO device
> */
> - kvm_iodevice_init(&s->dev,&picdev_ops);
> + kvm_iodevice_init(&s->dev_master,&picdev_ops);
> + kvm_iodevice_init(&s->dev_slave,&picdev_ops);
> + kvm_iodevice_init(&s->dev_eclr,&picdev_ops);
> mutex_lock(&kvm->slots_lock);
> - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&s->dev);
> + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,&s->dev_master);
> + if (ret< 0)
> + goto fail_unlock;
> +
> + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2,&s->dev_slave);
> + if (ret< 0)
> + goto fail_unlock;
> +
> + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2,&s->dev_eclr);
> + if (ret< 0)
> + goto fail_unlock;
> +
> mutex_unlock(&kvm->slots_lock);
> - if (ret< 0) {
> - kfree(s);
> - return NULL;
> - }
>
> return s;
> +
> +fail_unlock:
> +
> + mutex_unlock(&kvm->slots_lock);
> + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_master);
> + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_slave);
> + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_eclr);
> +
> + kfree(s);
> +
> + return NULL;
> }
You're unregistering devices that were never registered. It may work
now, but it's fragile.
> if (ret< 0)
> goto out_free_dev;
> list_add_tail(&dev->list,&kvm->coalesced_zones);
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..f59c1e8 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>
> kvm_iodevice_init(&p->dev,&ioeventfd_ops);
>
> - ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> + &p->dev);
Should this be p->length or 1?
> #include<asm/processor.h>
> #include<asm/io.h>
> @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> int i;
>
> for (i = 0; i< bus->dev_count; i++) {
> - struct kvm_io_device *pos = bus->devs[i];
> + struct kvm_io_device *pos = bus->range[i].dev;
>
This will call the destructor three times for the PIC. Is this safe?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3] IO: Intelligent device lookup on bus
2011-07-27 11:35 ` Avi Kivity
@ 2011-07-27 12:01 ` Sasha Levin
2011-07-27 12:37 ` Avi Kivity
0 siblings, 1 reply; 4+ messages in thread
From: Sasha Levin @ 2011-07-27 12:01 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, Marcelo Tosatti
On Wed, 2011-07-27 at 14:35 +0300, Avi Kivity wrote:
> On 07/24/2011 09:15 AM, Sasha Levin wrote:
> > Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
> > is to call the read or write callback for each device registered
> > on the bus until we find a device which handles it.
> >
> > Since the number of devices on a bus can be significant due to ioeventfds
> > and coalesced MMIO zones, this leads to a lot of overhead on each IO
> > operation.
> >
> > Instead of registering devices, we now register ranges which points to
> > a device. Lookup is done using an efficient bsearch instead of a linear
> > search.
> >
> > Performance test was conducted by comparing exit count per second with
> > 200 ioeventfds created on one byte and the guest is trying to access a
> > different byte continuously (triggering usermode exits).
> > Before the patch the guest has achieved 259k exits per second, after the
> > patch the guest does 274k exits per second.
> >
> > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> > index efad723..094e057 100644
> > --- a/arch/x86/kvm/i8254.c
> > +++ b/arch/x86/kvm/i8254.c
> > @@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> > kvm_register_irq_mask_notifier(kvm, 0,&pit->mask_notifier);
> >
> > kvm_iodevice_init(&pit->dev,&pit_dev_ops);
> > - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&pit->dev);
> > + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH,&pit->dev);
>
> Long line.
>
Will fix.
> >
> > -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> > +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
> > {
> > - return container_of(dev, struct kvm_pic, dev);
> > + switch (addr) {
> > + case 0x20:
> > + case 0x21:
> > + return container_of(dev, struct kvm_pic, dev_master);
> > + case 0xa0:
> > + case 0xa1:
> > + return container_of(dev, struct kvm_pic, dev_slave);
> > + case 0x4d0:
> > + case 0x4d1:
> > + return container_of(dev, struct kvm_pic, dev_eclr);
> > + }
> > +
> > + return NULL;
> > }
>
> Somewhat ugly. I think
>
> int picdev_write_master(...)
> {
> return pcidev_write(container_of(...), ...);
> }
>
> is nicer, no?
It would mean we need a total of 6 wrappers for master, slave and eclr
instead of this switch, if that sounds ok I'll change it.
>
> > @@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> > /*
> > * Initialize PIO device
> > */
> > - kvm_iodevice_init(&s->dev,&picdev_ops);
> > + kvm_iodevice_init(&s->dev_master,&picdev_ops);
> > + kvm_iodevice_init(&s->dev_slave,&picdev_ops);
> > + kvm_iodevice_init(&s->dev_eclr,&picdev_ops);
> > mutex_lock(&kvm->slots_lock);
> > - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&s->dev);
> > + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,&s->dev_master);
> > + if (ret< 0)
> > + goto fail_unlock;
> > +
> > + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2,&s->dev_slave);
> > + if (ret< 0)
> > + goto fail_unlock;
> > +
> > + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2,&s->dev_eclr);
> > + if (ret< 0)
> > + goto fail_unlock;
> > +
> > mutex_unlock(&kvm->slots_lock);
> > - if (ret< 0) {
> > - kfree(s);
> > - return NULL;
> > - }
> >
> > return s;
> > +
> > +fail_unlock:
> > +
> > + mutex_unlock(&kvm->slots_lock);
> > + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_master);
> > + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_slave);
> > + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_eclr);
> > +
> > + kfree(s);
> > +
> > + return NULL;
> > }
>
> You're unregistering devices that were never registered. It may work
> now, but it's fragile.
I'll fix that.
>
>
> > if (ret< 0)
> > goto out_free_dev;
> > list_add_tail(&dev->list,&kvm->coalesced_zones);
> > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > index 73358d2..f59c1e8 100644
> > --- a/virt/kvm/eventfd.c
> > +++ b/virt/kvm/eventfd.c
> > @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >
> > kvm_iodevice_init(&p->dev,&ioeventfd_ops);
> >
> > - ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> > + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> > + &p->dev);
>
> Should this be p->length or 1?
We register p->length since when we process a write, the operation
should be fully contained within the IO space of the device.
We verify that the write happens on the first byte within ioeventfd
write handler.
>
> > #include<asm/processor.h>
> > #include<asm/io.h>
> > @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> > int i;
> >
> > for (i = 0; i< bus->dev_count; i++) {
> > - struct kvm_io_device *pos = bus->devs[i];
> > + struct kvm_io_device *pos = bus->range[i].dev;
> >
>
> This will call the destructor three times for the PIC. Is this safe?
PIC doesn't have a destructor for devices, the code above just does
nothing for PIC devices.
--
Sasha.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3] IO: Intelligent device lookup on bus
2011-07-27 12:01 ` Sasha Levin
@ 2011-07-27 12:37 ` Avi Kivity
0 siblings, 0 replies; 4+ messages in thread
From: Avi Kivity @ 2011-07-27 12:37 UTC (permalink / raw)
To: Sasha Levin; +Cc: kvm, Marcelo Tosatti
On 07/27/2011 03:01 PM, Sasha Levin wrote:
> > >
> > > -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> > > +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
> > > {
> > > - return container_of(dev, struct kvm_pic, dev);
> > > + switch (addr) {
> > > + case 0x20:
> > > + case 0x21:
> > > + return container_of(dev, struct kvm_pic, dev_master);
> > > + case 0xa0:
> > > + case 0xa1:
> > > + return container_of(dev, struct kvm_pic, dev_slave);
> > > + case 0x4d0:
> > > + case 0x4d1:
> > > + return container_of(dev, struct kvm_pic, dev_eclr);
> > > + }
> > > +
> > > + return NULL;
> > > }
> >
> > Somewhat ugly. I think
> >
> > int picdev_write_master(...)
> > {
> > return pcidev_write(container_of(...), ...);
> > }
> >
> > is nicer, no?
>
> It would mean we need a total of 6 wrappers for master, slave and eclr
> instead of this switch, if that sounds ok I'll change it.
IMO, they're better than the switch.
> >
> >
> > > if (ret< 0)
> > > goto out_free_dev;
> > > list_add_tail(&dev->list,&kvm->coalesced_zones);
> > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > > index 73358d2..f59c1e8 100644
> > > --- a/virt/kvm/eventfd.c
> > > +++ b/virt/kvm/eventfd.c
> > > @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> > >
> > > kvm_iodevice_init(&p->dev,&ioeventfd_ops);
> > >
> > > - ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> > > + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> > > + &p->dev);
> >
> > Should this be p->length or 1?
>
> We register p->length since when we process a write, the operation
> should be fully contained within the IO space of the device.
>
> We verify that the write happens on the first byte within ioeventfd
> write handler.
Ok.
> >
> > > #include<asm/processor.h>
> > > #include<asm/io.h>
> > > @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> > > int i;
> > >
> > > for (i = 0; i< bus->dev_count; i++) {
> > > - struct kvm_io_device *pos = bus->devs[i];
> > > + struct kvm_io_device *pos = bus->range[i].dev;
> > >
> >
> > This will call the destructor three times for the PIC. Is this safe?
>
> PIC doesn't have a destructor for devices, the code above just does
> nothing for PIC devices.
Ok.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2011-07-27 12:37 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-24 6:15 [PATCH v3] IO: Intelligent device lookup on bus Sasha Levin
2011-07-27 11:35 ` Avi Kivity
2011-07-27 12:01 ` Sasha Levin
2011-07-27 12:37 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox