public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] IO: Intelligent device lookup on bus
@ 2011-07-24  6:15 Sasha Levin
  2011-07-27 11:35 ` Avi Kivity
  0 siblings, 1 reply; 4+ messages in thread
From: Sasha Levin @ 2011-07-24  6:15 UTC (permalink / raw)
  To: kvm; +Cc: Sasha Levin, Avi Kivity, Marcelo Tosatti

Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
is to call the read or write callback for each device registered
on the bus until we find a device which handles it.

Since the number of devices on a bus can be significant due to ioeventfds
and coalesced MMIO zones, this leads to a lot of overhead on each IO
operation.

Instead of registering devices, we now register ranges which points to
a device. Lookup is done using an efficient bsearch instead of a linear
search.

Performance test was conducted by comparing exit count per second with
200 ioeventfds created on one byte and the guest is trying to access a
different byte continuously (triggering usermode exits).
Before the patch the guest has achieved 259k exits per second, after the
patch the guest does 274k exits per second.

Changes in V3:
 - Small fix in error path of i8259.

Changes in V2:
 - Only register a device once.
 - Support for multiple devices using same range.
 - Added performance benchmark.
 - Using bsearch() and sort() found in /lib/.

Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 arch/x86/kvm/i8254.c      |    5 +-
 arch/x86/kvm/i8259.c      |   56 ++++++++++++++++++----
 arch/x86/kvm/irq.h        |    4 +-
 arch/x86/kvm/x86.c        |    6 ++-
 include/linux/kvm_host.h  |   18 ++++----
 virt/kvm/coalesced_mmio.c |    3 +-
 virt/kvm/eventfd.c        |    3 +-
 virt/kvm/ioapic.c         |    3 +-
 virt/kvm/kvm_main.c       |  112 ++++++++++++++++++++++++++++++++++++++++-----
 9 files changed, 171 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..094e057 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
 		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
-						&pit->speaker_dev);
+                                              KVM_SPEAKER_BASE_ADDRESS, 4,
+					      &pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
 	}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 19fe855..9e18698 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -459,15 +459,27 @@ static int picdev_in_range(gpa_t addr)
 	}
 }
 
-static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
+static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
 {
-	return container_of(dev, struct kvm_pic, dev);
+	switch (addr) {
+	case 0x20:
+	case 0x21:
+		return container_of(dev, struct kvm_pic, dev_master);
+	case 0xa0:
+	case 0xa1:
+		return container_of(dev, struct kvm_pic, dev_slave);
+	case 0x4d0:
+	case 0x4d1:
+		return container_of(dev, struct kvm_pic, dev_eclr);
+	}
+
+	return NULL;
 }
 
 static int picdev_write(struct kvm_io_device *this,
 			 gpa_t addr, int len, const void *val)
 {
-	struct kvm_pic *s = to_pic(this);
+	struct kvm_pic *s = to_pic(this, addr);
 	unsigned char data = *(unsigned char *)val;
 	if (!picdev_in_range(addr))
 		return -EOPNOTSUPP;
@@ -497,7 +509,7 @@ static int picdev_write(struct kvm_io_device *this,
 static int picdev_read(struct kvm_io_device *this,
 		       gpa_t addr, int len, void *val)
 {
-	struct kvm_pic *s = to_pic(this);
+	struct kvm_pic *s = to_pic(this, addr);
 	unsigned char data = 0;
 	if (!picdev_in_range(addr))
 		return -EOPNOTSUPP;
@@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	/*
 	 * Initialize PIO device
 	 */
-	kvm_iodevice_init(&s->dev, &picdev_ops);
+	kvm_iodevice_init(&s->dev_master, &picdev_ops);
+	kvm_iodevice_init(&s->dev_slave, &picdev_ops);
+	kvm_iodevice_init(&s->dev_eclr, &picdev_ops);
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, &s->dev_master);
+	if (ret < 0)
+		goto fail_unlock;
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
+	if (ret < 0)
+		goto fail_unlock;
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+	if (ret < 0)
+		goto fail_unlock;
+
 	mutex_unlock(&kvm->slots_lock);
-	if (ret < 0) {
-		kfree(s);
-		return NULL;
-	}
 
 	return s;
+
+fail_unlock:
+
+	mutex_unlock(&kvm->slots_lock);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
+	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_eclr);
+
+	kfree(s);
+
+	return NULL;
 }
 
 void kvm_destroy_pic(struct kvm *kvm)
@@ -577,7 +609,9 @@ void kvm_destroy_pic(struct kvm *kvm)
 	struct kvm_pic *vpic = kvm->arch.vpic;
 
 	if (vpic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
+		kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
 		kvm->arch.vpic = NULL;
 		kfree(vpic);
 	}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 53e2d08..2086f2b 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -66,7 +66,9 @@ struct kvm_pic {
 	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
 	int output;		/* intr from master PIC */
-	struct kvm_io_device dev;
+	struct kvm_io_device dev_master;
+	struct kvm_io_device dev_slave;
+	struct kvm_io_device dev_eclr;
 	void (*ack_notifier)(void *opaque, int irq);
 	unsigned long irq_states[16];
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e80f0d7..4cea9cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3561,7 +3561,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			if (r) {
 				mutex_lock(&kvm->slots_lock);
 				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-							  &vpic->dev);
+							  &vpic->dev_master);
+				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+							  &vpic->dev_slave);
+				kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+							  &vpic->dev_eclr);
 				mutex_unlock(&kvm->slots_lock);
 				kfree(vpic);
 				goto create_irqchip_unlock;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ff4d406..d0e42f3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,16 +55,16 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-/*
- * It would be nice to use something smarter than a linear search, TBD...
- * Thankfully we dont expect many devices to register (famous last words :),
- * so until then it will suffice.  At least its abstracted so we can change
- * in one place.
- */
+struct kvm_io_range {
+	gpa_t addr;
+	int len;
+	struct kvm_io_device *dev;
+};
+
 struct kvm_io_bus {
 	int                   dev_count;
 #define NR_IOBUS_DEVS 300
-	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+	struct kvm_io_range range[NR_IOBUS_DEVS];
 };
 
 enum kvm_bus {
@@ -77,8 +77,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val);
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			    struct kvm_io_device *dev);
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+			    int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			      struct kvm_io_device *dev);
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 2316ec1..a6ec206 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -141,7 +141,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	dev->zone = *zone;
 
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
+				      zone->size, &dev->dev);
 	if (ret < 0)
 		goto out_free_dev;
 	list_add_tail(&dev->list, &kvm->coalesced_zones);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 73358d2..f59c1e8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
+				      &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8df1ca1..86aeea1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+                                      IOAPIC_MEM_LENGTH, &ioapic->dev);
 	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aefdda3..0c228f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,8 @@
 #include <linux/srcu.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/bsearch.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 	int i;
 
 	for (i = 0; i < bus->dev_count; i++) {
-		struct kvm_io_device *pos = bus->devs[i];
+		struct kvm_io_device *pos = bus->range[i].dev;
 
 		kvm_iodevice_destructor(pos);
 	}
 	kfree(bus);
 }
 
+int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+{
+	const struct kvm_io_range *r1 = p1;
+	const struct kvm_io_range *r2 = p2;
+
+	if (r1->addr < r2->addr)
+		return -1;
+	if (r1->addr + r1->len > r2->addr + r2->len)
+		return 1;
+	return 0;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+			  gpa_t addr, int len)
+{
+	int i;
+
+	if (bus->dev_count == NR_IOBUS_DEVS)
+		return -ENOSPC;
+
+	bus->range[bus->dev_count++] = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+		.dev = dev,
+	};
+
+	sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
+		kvm_io_bus_sort_cmp, NULL);
+
+	return 0;
+}
+
+int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+			     gpa_t addr, int len)
+{
+	struct kvm_io_range *range, key;
+	int off;
+
+	key = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+	};
+
+	range = bsearch(&key, bus->range, bus->dev_count,
+			sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
+	if (range == NULL)
+		return -ENOENT;
+
+	off = range - bus->range;
+
+	while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+		off--;
+
+	return off;
+}
+
 /* kvm_io_bus_write - called under kvm->slots_lock */
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
-	int i;
+	int idx;
 	struct kvm_io_bus *bus;
+	struct kvm_io_range range;
+
+	range = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+	};
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+	idx = kvm_io_bus_get_first_dev(bus, addr, len);
+	if (idx < 0)
+		return -EOPNOTSUPP;
+
+	while (idx < bus->dev_count &&
+		kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+		if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
 			return 0;
+		idx++;
+	}
+
 	return -EOPNOTSUPP;
 }
 
@@ -2416,19 +2488,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val)
 {
-	int i;
+	int idx;
 	struct kvm_io_bus *bus;
+	struct kvm_io_range range;
+
+	range = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+	};
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+	idx = kvm_io_bus_get_first_dev(bus, addr, len);
+	if (idx < 0)
+		return -EOPNOTSUPP;
+
+	while (idx < bus->dev_count &&
+		kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+		if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
 			return 0;
+		idx++;
+	}
+
 	return -EOPNOTSUPP;
 }
 
 /* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			    struct kvm_io_device *dev)
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+			    int len, struct kvm_io_device *dev)
 {
 	struct kvm_io_bus *new_bus, *bus;
 
@@ -2440,7 +2526,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	if (!new_bus)
 		return -ENOMEM;
 	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
-	new_bus->devs[new_bus->dev_count++] = dev;
+	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);
@@ -2464,9 +2550,11 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 
 	r = -ENOENT;
 	for (i = 0; i < new_bus->dev_count; i++)
-		if (new_bus->devs[i] == dev) {
+		if (new_bus->range[i].dev == dev) {
 			r = 0;
-			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
+			new_bus->range[i] = new_bus->range[--new_bus->dev_count];
+			sort(new_bus->range, new_bus->dev_count, sizeof(struct kvm_io_range),
+				kvm_io_bus_sort_cmp, NULL);
 			break;
 		}
 
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] IO: Intelligent device lookup on bus
  2011-07-24  6:15 [PATCH v3] IO: Intelligent device lookup on bus Sasha Levin
@ 2011-07-27 11:35 ` Avi Kivity
  2011-07-27 12:01   ` Sasha Levin
  0 siblings, 1 reply; 4+ messages in thread
From: Avi Kivity @ 2011-07-27 11:35 UTC (permalink / raw)
  To: Sasha Levin; +Cc: kvm, Marcelo Tosatti

On 07/24/2011 09:15 AM, Sasha Levin wrote:
> Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
> is to call the read or write callback for each device registered
> on the bus until we find a device which handles it.
>
> Since the number of devices on a bus can be significant due to ioeventfds
> and coalesced MMIO zones, this leads to a lot of overhead on each IO
> operation.
>
> Instead of registering devices, we now register ranges which points to
> a device. Lookup is done using an efficient bsearch instead of a linear
> search.
>
> Performance test was conducted by comparing exit count per second with
> 200 ioeventfds created on one byte and the guest is trying to access a
> different byte continuously (triggering usermode exits).
> Before the patch the guest has achieved 259k exits per second, after the
> patch the guest does 274k exits per second.
>
> diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> index efad723..094e057 100644
> --- a/arch/x86/kvm/i8254.c
> +++ b/arch/x86/kvm/i8254.c
> @@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
>   	kvm_register_irq_mask_notifier(kvm, 0,&pit->mask_notifier);
>
>   	kvm_iodevice_init(&pit->dev,&pit_dev_ops);
> -	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&pit->dev);
> +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH,&pit->dev);

Long line.

>
> -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
>   {
> -	return container_of(dev, struct kvm_pic, dev);
> +	switch (addr) {
> +	case 0x20:
> +	case 0x21:
> +		return container_of(dev, struct kvm_pic, dev_master);
> +	case 0xa0:
> +	case 0xa1:
> +		return container_of(dev, struct kvm_pic, dev_slave);
> +	case 0x4d0:
> +	case 0x4d1:
> +		return container_of(dev, struct kvm_pic, dev_eclr);
> +	}
> +
> +	return NULL;
>   }

Somewhat ugly.  I think

     int picdev_write_master(...)
    {
         return pcidev_write(container_of(...), ...);
     }

is nicer, no?

> @@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
>   	/*
>   	 * Initialize PIO device
>   	 */
> -	kvm_iodevice_init(&s->dev,&picdev_ops);
> +	kvm_iodevice_init(&s->dev_master,&picdev_ops);
> +	kvm_iodevice_init(&s->dev_slave,&picdev_ops);
> +	kvm_iodevice_init(&s->dev_eclr,&picdev_ops);
>   	mutex_lock(&kvm->slots_lock);
> -	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&s->dev);
> +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,&s->dev_master);
> +	if (ret<  0)
> +		goto fail_unlock;
> +
> +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2,&s->dev_slave);
> +	if (ret<  0)
> +		goto fail_unlock;
> +
> +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2,&s->dev_eclr);
> +	if (ret<  0)
> +		goto fail_unlock;
> +
>   	mutex_unlock(&kvm->slots_lock);
> -	if (ret<  0) {
> -		kfree(s);
> -		return NULL;
> -	}
>
>   	return s;
> +
> +fail_unlock:
> +
> +	mutex_unlock(&kvm->slots_lock);
> +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_master);
> +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_slave);
> +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_eclr);
> +
> +	kfree(s);
> +
> +	return NULL;
>   }

You're unregistering devices that were never registered.  It may work 
now, but it's fragile.


>   	if (ret<  0)
>   		goto out_free_dev;
>   	list_add_tail(&dev->list,&kvm->coalesced_zones);
> diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> index 73358d2..f59c1e8 100644
> --- a/virt/kvm/eventfd.c
> +++ b/virt/kvm/eventfd.c
> @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
>
>   	kvm_iodevice_init(&p->dev,&ioeventfd_ops);
>
> -	ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> +	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> +				&p->dev);

Should this be p->length or 1?

>   #include<asm/processor.h>
>   #include<asm/io.h>
> @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
>   	int i;
>
>   	for (i = 0; i<  bus->dev_count; i++) {
> -		struct kvm_io_device *pos = bus->devs[i];
> +		struct kvm_io_device *pos = bus->range[i].dev;
>

This will call the destructor three times for the PIC.  Is this safe?


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] IO: Intelligent device lookup on bus
  2011-07-27 11:35 ` Avi Kivity
@ 2011-07-27 12:01   ` Sasha Levin
  2011-07-27 12:37     ` Avi Kivity
  0 siblings, 1 reply; 4+ messages in thread
From: Sasha Levin @ 2011-07-27 12:01 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm, Marcelo Tosatti

On Wed, 2011-07-27 at 14:35 +0300, Avi Kivity wrote:
> On 07/24/2011 09:15 AM, Sasha Levin wrote:
> > Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
> > is to call the read or write callback for each device registered
> > on the bus until we find a device which handles it.
> >
> > Since the number of devices on a bus can be significant due to ioeventfds
> > and coalesced MMIO zones, this leads to a lot of overhead on each IO
> > operation.
> >
> > Instead of registering devices, we now register ranges which points to
> > a device. Lookup is done using an efficient bsearch instead of a linear
> > search.
> >
> > Performance test was conducted by comparing exit count per second with
> > 200 ioeventfds created on one byte and the guest is trying to access a
> > different byte continuously (triggering usermode exits).
> > Before the patch the guest has achieved 259k exits per second, after the
> > patch the guest does 274k exits per second.
> >
> > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
> > index efad723..094e057 100644
> > --- a/arch/x86/kvm/i8254.c
> > +++ b/arch/x86/kvm/i8254.c
> > @@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
> >   	kvm_register_irq_mask_notifier(kvm, 0,&pit->mask_notifier);
> >
> >   	kvm_iodevice_init(&pit->dev,&pit_dev_ops);
> > -	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&pit->dev);
> > +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH,&pit->dev);
> 
> Long line.
> 

Will fix.

> >
> > -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> > +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
> >   {
> > -	return container_of(dev, struct kvm_pic, dev);
> > +	switch (addr) {
> > +	case 0x20:
> > +	case 0x21:
> > +		return container_of(dev, struct kvm_pic, dev_master);
> > +	case 0xa0:
> > +	case 0xa1:
> > +		return container_of(dev, struct kvm_pic, dev_slave);
> > +	case 0x4d0:
> > +	case 0x4d1:
> > +		return container_of(dev, struct kvm_pic, dev_eclr);
> > +	}
> > +
> > +	return NULL;
> >   }
> 
> Somewhat ugly.  I think
> 
>      int picdev_write_master(...)
>     {
>          return pcidev_write(container_of(...), ...);
>      }
> 
> is nicer, no?

It would mean we need a total of 6 wrappers for master, slave and eclr
instead of this switch, if that sounds ok I'll change it.

> 
> > @@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
> >   	/*
> >   	 * Initialize PIO device
> >   	 */
> > -	kvm_iodevice_init(&s->dev,&picdev_ops);
> > +	kvm_iodevice_init(&s->dev_master,&picdev_ops);
> > +	kvm_iodevice_init(&s->dev_slave,&picdev_ops);
> > +	kvm_iodevice_init(&s->dev_eclr,&picdev_ops);
> >   	mutex_lock(&kvm->slots_lock);
> > -	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&s->dev);
> > +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,&s->dev_master);
> > +	if (ret<  0)
> > +		goto fail_unlock;
> > +
> > +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2,&s->dev_slave);
> > +	if (ret<  0)
> > +		goto fail_unlock;
> > +
> > +	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2,&s->dev_eclr);
> > +	if (ret<  0)
> > +		goto fail_unlock;
> > +
> >   	mutex_unlock(&kvm->slots_lock);
> > -	if (ret<  0) {
> > -		kfree(s);
> > -		return NULL;
> > -	}
> >
> >   	return s;
> > +
> > +fail_unlock:
> > +
> > +	mutex_unlock(&kvm->slots_lock);
> > +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_master);
> > +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_slave);
> > +	kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_eclr);
> > +
> > +	kfree(s);
> > +
> > +	return NULL;
> >   }
> 
> You're unregistering devices that were never registered.  It may work 
> now, but it's fragile.

I'll fix that.

> 
> 
> >   	if (ret<  0)
> >   		goto out_free_dev;
> >   	list_add_tail(&dev->list,&kvm->coalesced_zones);
> > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> > index 73358d2..f59c1e8 100644
> > --- a/virt/kvm/eventfd.c
> > +++ b/virt/kvm/eventfd.c
> > @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >
> >   	kvm_iodevice_init(&p->dev,&ioeventfd_ops);
> >
> > -	ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> > +	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> > +				&p->dev);
> 
> Should this be p->length or 1?

We register p->length since when we process a write, the operation
should be fully contained within the IO space of the device.

We verify that the write happens on the first byte within ioeventfd
write handler.

> 
> >   #include<asm/processor.h>
> >   #include<asm/io.h>
> > @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> >   	int i;
> >
> >   	for (i = 0; i<  bus->dev_count; i++) {
> > -		struct kvm_io_device *pos = bus->devs[i];
> > +		struct kvm_io_device *pos = bus->range[i].dev;
> >
> 
> This will call the destructor three times for the PIC.  Is this safe?

PIC doesn't have a destructor for devices, the code above just does
nothing for PIC devices.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] IO: Intelligent device lookup on bus
  2011-07-27 12:01   ` Sasha Levin
@ 2011-07-27 12:37     ` Avi Kivity
  0 siblings, 0 replies; 4+ messages in thread
From: Avi Kivity @ 2011-07-27 12:37 UTC (permalink / raw)
  To: Sasha Levin; +Cc: kvm, Marcelo Tosatti

On 07/27/2011 03:01 PM, Sasha Levin wrote:
> >  >
> >  >  -static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
> >  >  +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
> >  >    {
> >  >  -	return container_of(dev, struct kvm_pic, dev);
> >  >  +	switch (addr) {
> >  >  +	case 0x20:
> >  >  +	case 0x21:
> >  >  +		return container_of(dev, struct kvm_pic, dev_master);
> >  >  +	case 0xa0:
> >  >  +	case 0xa1:
> >  >  +		return container_of(dev, struct kvm_pic, dev_slave);
> >  >  +	case 0x4d0:
> >  >  +	case 0x4d1:
> >  >  +		return container_of(dev, struct kvm_pic, dev_eclr);
> >  >  +	}
> >  >  +
> >  >  +	return NULL;
> >  >    }
> >
> >  Somewhat ugly.  I think
> >
> >       int picdev_write_master(...)
> >      {
> >           return pcidev_write(container_of(...), ...);
> >       }
> >
> >  is nicer, no?
>
> It would mean we need a total of 6 wrappers for master, slave and eclr
> instead of this switch, if that sounds ok I'll change it.

IMO, they're better than the switch.

> >
> >
> >  >    	if (ret<   0)
> >  >    		goto out_free_dev;
> >  >    	list_add_tail(&dev->list,&kvm->coalesced_zones);
> >  >  diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
> >  >  index 73358d2..f59c1e8 100644
> >  >  --- a/virt/kvm/eventfd.c
> >  >  +++ b/virt/kvm/eventfd.c
> >  >  @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
> >  >
> >  >    	kvm_iodevice_init(&p->dev,&ioeventfd_ops);
> >  >
> >  >  -	ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev);
> >  >  +	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
> >  >  +				&p->dev);
> >
> >  Should this be p->length or 1?
>
> We register p->length since when we process a write, the operation
> should be fully contained within the IO space of the device.
>
> We verify that the write happens on the first byte within ioeventfd
> write handler.

Ok.

> >
> >  >    #include<asm/processor.h>
> >  >    #include<asm/io.h>
> >  >  @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
> >  >    	int i;
> >  >
> >  >    	for (i = 0; i<   bus->dev_count; i++) {
> >  >  -		struct kvm_io_device *pos = bus->devs[i];
> >  >  +		struct kvm_io_device *pos = bus->range[i].dev;
> >  >
> >
> >  This will call the destructor three times for the PIC.  Is this safe?
>
> PIC doesn't have a destructor for devices, the code above just does
> nothing for PIC devices.

Ok.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-07-27 12:37 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-07-24  6:15 [PATCH v3] IO: Intelligent device lookup on bus Sasha Levin
2011-07-27 11:35 ` Avi Kivity
2011-07-27 12:01   ` Sasha Levin
2011-07-27 12:37     ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox