public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] kvm tools: Add MMIO coalescing support
@ 2011-06-03 19:51 Sasha Levin
  2011-06-04  9:38 ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Sasha Levin @ 2011-06-03 19:51 UTC (permalink / raw)
  To: penberg; +Cc: kvm, mingo, asias.hejun, gorcunov, prasadjoshi124, Sasha Levin

Coalescing MMIO allows us to avoid an exit every time we have a
MMIO write, instead - MMIO writes are coalesced in a ring which
can be flushed once an exit for a different reason is needed.
A MMIO exit is also trigged once the ring is full.

Coalesce all MMIO regions registered in the MMIO mapper.
Add a coalescing handler under kvm_cpu.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
---
 tools/kvm/hw/vesa.c             |    2 +-
 tools/kvm/include/kvm/kvm-cpu.h |    2 ++
 tools/kvm/include/kvm/kvm.h     |    4 ++--
 tools/kvm/kvm-cpu.c             |   24 ++++++++++++++++++++++++
 tools/kvm/mmio.c                |   24 ++++++++++++++++++++++--
 5 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/tools/kvm/hw/vesa.c b/tools/kvm/hw/vesa.c
index b99f2de..a12c601 100644
--- a/tools/kvm/hw/vesa.c
+++ b/tools/kvm/hw/vesa.c
@@ -77,7 +77,7 @@ void vesa__init(struct kvm *kvm)
 	vesa_pci_device.bar[0]		= vesa_base_addr | PCI_BASE_ADDRESS_SPACE_IO;
 	pci__register(&vesa_pci_device, dev);
 
-	kvm__register_mmio(VESA_MEM_ADDR, VESA_MEM_SIZE, &vesa_mmio_callback);
+	kvm__register_mmio(kvm, VESA_MEM_ADDR, VESA_MEM_SIZE, &vesa_mmio_callback);
 
 	pthread_create(&thread, NULL, vesa__dovnc, kvm);
 }
diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
index 4d99246..1eb4a52 100644
--- a/tools/kvm/include/kvm/kvm-cpu.h
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -24,6 +24,8 @@ struct kvm_cpu {
 
 	u8			is_running;
 	u8			paused;
+
+	struct kvm_coalesced_mmio_ring	*ring;
 };
 
 struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id);
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index d22a849..55551de 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -49,8 +49,8 @@ void kvm__stop_timer(struct kvm *kvm);
 void kvm__irq_line(struct kvm *kvm, int irq, int level);
 bool kvm__emulate_io(struct kvm *kvm, u16 port, void *data, int direction, int size, u32 count);
 bool kvm__emulate_mmio(struct kvm *kvm, u64 phys_addr, u8 *data, u32 len, u8 is_write);
-bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write));
-bool kvm__deregister_mmio(u64 phys_addr);
+bool kvm__register_mmio(struct kvm *kvm, u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write));
+bool kvm__deregister_mmio(struct kvm *kvm, u64 phys_addr);
 void kvm__pause(void);
 void kvm__continue(void);
 void kvm__notify_paused(void);
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index be0528b..1fb1c74 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -14,6 +14,8 @@
 #include <errno.h>
 #include <stdio.h>
 
+#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
+
 extern __thread struct kvm_cpu *current_kvm_cpu;
 
 static inline bool is_in_protected_mode(struct kvm_cpu *vcpu)
@@ -70,6 +72,7 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
 {
 	struct kvm_cpu *vcpu;
 	int mmap_size;
+	int coalesced_offset;
 
 	vcpu		= kvm_cpu__new(kvm);
 	if (!vcpu)
@@ -89,6 +92,10 @@ struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id)
 	if (vcpu->kvm_run == MAP_FAILED)
 		die("unable to mmap vcpu fd");
 
+	coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO);
+	if (coalesced_offset)
+		vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE);
+
 	vcpu->is_running = true;
 
 	return vcpu;
@@ -395,6 +402,22 @@ static void kvm_cpu_signal_handler(int signum)
 	}
 }
 
+static void kvm_cpu__handle_coalesced_mmio(struct kvm_cpu *cpu)
+{
+	if (cpu->ring) {
+		while (cpu->ring->first != cpu->ring->last) {
+			struct kvm_coalesced_mmio *m;
+			m = &cpu->ring->coalesced_mmio[cpu->ring->first];
+			kvm__emulate_mmio(cpu->kvm,
+					m->phys_addr,
+					m->data,
+					m->len,
+					1);
+			cpu->ring->first = (cpu->ring->first + 1) % KVM_COALESCED_MMIO_MAX;
+		}
+	}
+}
+
 int kvm_cpu__start(struct kvm_cpu *cpu)
 {
 	sigset_t sigset;
@@ -462,6 +485,7 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
 		default:
 			goto panic_kvm;
 		}
+		kvm_cpu__handle_coalesced_mmio(cpu);
 	}
 
 exit_kvm:
diff --git a/tools/kvm/mmio.c b/tools/kvm/mmio.c
index acd091e..64bef37 100644
--- a/tools/kvm/mmio.c
+++ b/tools/kvm/mmio.c
@@ -5,6 +5,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include <sys/ioctl.h>
+#include <linux/kvm.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
 
@@ -53,9 +55,10 @@ static const char *to_direction(u8 is_write)
 	return "read";
 }
 
-bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write))
+bool kvm__register_mmio(struct kvm *kvm, u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callback_fn)(u64 addr, u8 *data, u32 len, u8 is_write))
 {
 	struct mmio_mapping *mmio;
+	struct kvm_coalesced_mmio_zone zone;
 	int ret;
 
 	mmio = malloc(sizeof(*mmio));
@@ -67,6 +70,16 @@ bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callba
 		.kvm_mmio_callback_fn = kvm_mmio_callback_fn,
 	};
 
+	zone = (struct kvm_coalesced_mmio_zone) {
+		.addr	= phys_addr,
+		.size	= phys_addr_len,
+	};
+	ret = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
+	if (ret < 0) {
+		free(mmio);
+		return false;
+	}
+
 	br_write_lock();
 	ret = mmio_insert(&mmio_tree, mmio);
 	br_write_unlock();
@@ -74,9 +87,10 @@ bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callba
 	return ret;
 }
 
-bool kvm__deregister_mmio(u64 phys_addr)
+bool kvm__deregister_mmio(struct kvm *kvm, u64 phys_addr)
 {
 	struct mmio_mapping *mmio;
+	struct kvm_coalesced_mmio_zone zone;
 
 	br_write_lock();
 	mmio = mmio_search_single(&mmio_tree, phys_addr);
@@ -85,6 +99,12 @@ bool kvm__deregister_mmio(u64 phys_addr)
 		return false;
 	}
 
+	zone = (struct kvm_coalesced_mmio_zone) {
+		.addr	= phys_addr,
+		.size	= 1,
+	};
+	ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+
 	rb_int_erase(&mmio_tree, &mmio->node);
 	br_write_unlock();
 
-- 
1.7.5.3


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-03 19:51 [PATCH] kvm tools: Add MMIO coalescing support Sasha Levin
@ 2011-06-04  9:38 ` Ingo Molnar
  2011-06-04 10:14   ` Sasha Levin
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04  9:38 UTC (permalink / raw)
  To: Sasha Levin; +Cc: penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Sasha Levin <levinsasha928@gmail.com> wrote:

> Coalescing MMIO allows us to avoid an exit every time we have a
> MMIO write, instead - MMIO writes are coalesced in a ring which
> can be flushed once an exit for a different reason is needed.
> A MMIO exit is also trigged once the ring is full.
> 
> Coalesce all MMIO regions registered in the MMIO mapper.
> Add a coalescing handler under kvm_cpu.

Does this have any effect on latency? I.e. does the guest side 
guarantee that the pending queue will be flushed after a group of 
updates have been done?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04  9:38 ` Ingo Molnar
@ 2011-06-04 10:14   ` Sasha Levin
  2011-06-04 10:17     ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Sasha Levin @ 2011-06-04 10:14 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: penberg, kvm, asias.hejun, gorcunov, prasadjoshi124

On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
> * Sasha Levin <levinsasha928@gmail.com> wrote:
> 
> > Coalescing MMIO allows us to avoid an exit every time we have a
> > MMIO write, instead - MMIO writes are coalesced in a ring which
> > can be flushed once an exit for a different reason is needed.
> > A MMIO exit is also trigged once the ring is full.
> > 
> > Coalesce all MMIO regions registered in the MMIO mapper.
> > Add a coalescing handler under kvm_cpu.
> 
> Does this have any effect on latency? I.e. does the guest side 
> guarantee that the pending queue will be flushed after a group of 
> updates have been done?

Theres nothing that detects groups of MMIO writes, but the ring size is
a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
we'll exit once the ring is full.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:14   ` Sasha Levin
@ 2011-06-04 10:17     ` Ingo Molnar
  2011-06-04 10:28       ` Sasha Levin
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 10:17 UTC (permalink / raw)
  To: Sasha Levin; +Cc: penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Sasha Levin <levinsasha928@gmail.com> wrote:

> On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > 
> > > Coalescing MMIO allows us to avoid an exit every time we have a
> > > MMIO write, instead - MMIO writes are coalesced in a ring which
> > > can be flushed once an exit for a different reason is needed.
> > > A MMIO exit is also trigged once the ring is full.
> > > 
> > > Coalesce all MMIO regions registered in the MMIO mapper.
> > > Add a coalescing handler under kvm_cpu.
> > 
> > Does this have any effect on latency? I.e. does the guest side 
> > guarantee that the pending queue will be flushed after a group of 
> > updates have been done?
> 
> Theres nothing that detects groups of MMIO writes, but the ring size is
> a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
> we'll exit once the ring is full.

But if the page is only filled partially and if mmio is not submitted 
by the guest indefinitely (say it runs a lot of user-space code) then 
the mmio remains pending in the partial-page buffer?

If that's how it works then i *really* don't like this, this looks 
like a seriously mis-designed batching feature which might have 
improved a few server benchmarks but which will introduce random, 
hard to debug delays all around the place!

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:17     ` Ingo Molnar
@ 2011-06-04 10:28       ` Sasha Levin
  2011-06-04 10:35         ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Sasha Levin @ 2011-06-04 10:28 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: penberg, kvm, asias.hejun, gorcunov, prasadjoshi124

On Sat, 2011-06-04 at 12:17 +0200, Ingo Molnar wrote:
> * Sasha Levin <levinsasha928@gmail.com> wrote:
> 
> > On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
> > > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > > 
> > > > Coalescing MMIO allows us to avoid an exit every time we have a
> > > > MMIO write, instead - MMIO writes are coalesced in a ring which
> > > > can be flushed once an exit for a different reason is needed.
> > > > A MMIO exit is also trigged once the ring is full.
> > > > 
> > > > Coalesce all MMIO regions registered in the MMIO mapper.
> > > > Add a coalescing handler under kvm_cpu.
> > > 
> > > Does this have any effect on latency? I.e. does the guest side 
> > > guarantee that the pending queue will be flushed after a group of 
> > > updates have been done?
> > 
> > Theres nothing that detects groups of MMIO writes, but the ring size is
> > a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
> > we'll exit once the ring is full.
> 
> But if the page is only filled partially and if mmio is not submitted 
> by the guest indefinitely (say it runs a lot of user-space code) then 
> the mmio remains pending in the partial-page buffer?

We flush the ring on any exit from the guest, not just MMIO exit.
But yes, from what I understand from the code - if the buffer is only
partially full and we don't take an exit, the buffer doesn't get back to
the host.

ioeventfds and such are making exits less common, so yes - it's possible
we won't have an exit in a while.

> If that's how it works then i *really* don't like this, this looks 
> like a seriously mis-designed batching feature which might have 
> improved a few server benchmarks but which will introduce random, 
> hard to debug delays all around the place!


-- 

Sasha.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:28       ` Sasha Levin
@ 2011-06-04 10:35         ` Ingo Molnar
  2011-06-04 10:39           ` Alexander Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 10:35 UTC (permalink / raw)
  To: Sasha Levin; +Cc: penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Sasha Levin <levinsasha928@gmail.com> wrote:

> On Sat, 2011-06-04 at 12:17 +0200, Ingo Molnar wrote:
> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > 
> > > On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
> > > > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > > > 
> > > > > Coalescing MMIO allows us to avoid an exit every time we have a
> > > > > MMIO write, instead - MMIO writes are coalesced in a ring which
> > > > > can be flushed once an exit for a different reason is needed.
> > > > > A MMIO exit is also trigged once the ring is full.
> > > > > 
> > > > > Coalesce all MMIO regions registered in the MMIO mapper.
> > > > > Add a coalescing handler under kvm_cpu.
> > > > 
> > > > Does this have any effect on latency? I.e. does the guest side 
> > > > guarantee that the pending queue will be flushed after a group of 
> > > > updates have been done?
> > > 
> > > Theres nothing that detects groups of MMIO writes, but the ring size is
> > > a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
> > > we'll exit once the ring is full.
> > 
> > But if the page is only filled partially and if mmio is not submitted 
> > by the guest indefinitely (say it runs a lot of user-space code) then 
> > the mmio remains pending in the partial-page buffer?
> 
> We flush the ring on any exit from the guest, not just MMIO exit.
> But yes, from what I understand from the code - if the buffer is only
> partially full and we don't take an exit, the buffer doesn't get back to
> the host.
> 
> ioeventfds and such are making exits less common, so yes - it's possible
> we won't have an exit in a while.
> 
> > If that's how it works then i *really* don't like this, this looks 
> > like a seriously mis-designed batching feature which might have 
> > improved a few server benchmarks but which will introduce random, 
> > hard to debug delays all around the place!

The proper way to implement batching is not to do it blindly like 
here, but to do what we do in the TLB coalescing/gather code in the 
kernel:

	gather();

	... submit individual TLB flushes ...

	flush();

That's how it should be done here too: each virtio driver that issues 
a group of MMIOs should first start batching, then issue the 
individual MMIOs and then flush them.

That can be simplified to leave out the gather() phase, i.e. just 
issue batched MMIOs and flush them before exiting the virtio (guest 
side) driver routines.

KVM_CAP_COALESCED_MMIO is an unsafe shortcut hack in its current form 
and it looks completely unsafe.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:35         ` Ingo Molnar
@ 2011-06-04 10:39           ` Alexander Graf
  2011-06-04 10:47             ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Alexander Graf @ 2011-06-04 10:39 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


On 04.06.2011, at 12:35, Ingo Molnar wrote:

> 
> * Sasha Levin <levinsasha928@gmail.com> wrote:
> 
>> On Sat, 2011-06-04 at 12:17 +0200, Ingo Molnar wrote:
>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
>>> 
>>>> On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
>>>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
>>>>> 
>>>>>> Coalescing MMIO allows us to avoid an exit every time we have a
>>>>>> MMIO write, instead - MMIO writes are coalesced in a ring which
>>>>>> can be flushed once an exit for a different reason is needed.
>>>>>> A MMIO exit is also trigged once the ring is full.
>>>>>> 
>>>>>> Coalesce all MMIO regions registered in the MMIO mapper.
>>>>>> Add a coalescing handler under kvm_cpu.
>>>>> 
>>>>> Does this have any effect on latency? I.e. does the guest side 
>>>>> guarantee that the pending queue will be flushed after a group of 
>>>>> updates have been done?
>>>> 
>>>> Theres nothing that detects groups of MMIO writes, but the ring size is
>>>> a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
>>>> we'll exit once the ring is full.
>>> 
>>> But if the page is only filled partially and if mmio is not submitted 
>>> by the guest indefinitely (say it runs a lot of user-space code) then 
>>> the mmio remains pending in the partial-page buffer?
>> 
>> We flush the ring on any exit from the guest, not just MMIO exit.
>> But yes, from what I understand from the code - if the buffer is only
>> partially full and we don't take an exit, the buffer doesn't get back to
>> the host.
>> 
>> ioeventfds and such are making exits less common, so yes - it's possible
>> we won't have an exit in a while.
>> 
>>> If that's how it works then i *really* don't like this, this looks 
>>> like a seriously mis-designed batching feature which might have 
>>> improved a few server benchmarks but which will introduce random, 
>>> hard to debug delays all around the place!
> 
> The proper way to implement batching is not to do it blindly like 
> here, but to do what we do in the TLB coalescing/gather code in the 
> kernel:
> 
> 	gather();
> 
> 	... submit individual TLB flushes ...
> 
> 	flush();
> 
> That's how it should be done here too: each virtio driver that issues 

The world doesn't consist of virtio drivers. It also doesn't consist of only OSs and drivers that we control 100%.

> a group of MMIOs should first start batching, then issue the 
> individual MMIOs and then flush them.
> 
> That can be simplified to leave out the gather() phase, i.e. just 
> issue batched MMIOs and flush them before exiting the virtio (guest 
> side) driver routines.

This acceleration is done to speed up the host kernel<->userspace side. It's completely independent from the guest. If you want to have the guest communicate fast, create an asynchronous ring and process that. And that's what virtio already does today.

> KVM_CAP_COALESCED_MMIO is an unsafe shortcut hack in its current form 
> and it looks completely unsafe.

I haven't tracked the history of it, but I always assumed it was used for repz mov instructions where we already know the size of mmio transactions.


Alex


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:39           ` Alexander Graf
@ 2011-06-04 10:47             ` Ingo Molnar
  2011-06-04 10:54               ` Alexander Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 10:47 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Alexander Graf <agraf@suse.de> wrote:

> 
> On 04.06.2011, at 12:35, Ingo Molnar wrote:
> 
> > 
> > * Sasha Levin <levinsasha928@gmail.com> wrote:
> > 
> >> On Sat, 2011-06-04 at 12:17 +0200, Ingo Molnar wrote:
> >>> * Sasha Levin <levinsasha928@gmail.com> wrote:
> >>> 
> >>>> On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
> >>>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
> >>>>> 
> >>>>>> Coalescing MMIO allows us to avoid an exit every time we have a
> >>>>>> MMIO write, instead - MMIO writes are coalesced in a ring which
> >>>>>> can be flushed once an exit for a different reason is needed.
> >>>>>> A MMIO exit is also trigged once the ring is full.
> >>>>>> 
> >>>>>> Coalesce all MMIO regions registered in the MMIO mapper.
> >>>>>> Add a coalescing handler under kvm_cpu.
> >>>>> 
> >>>>> Does this have any effect on latency? I.e. does the guest side 
> >>>>> guarantee that the pending queue will be flushed after a group of 
> >>>>> updates have been done?
> >>>> 
> >>>> Theres nothing that detects groups of MMIO writes, but the ring size is
> >>>> a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
> >>>> we'll exit once the ring is full.
> >>> 
> >>> But if the page is only filled partially and if mmio is not submitted 
> >>> by the guest indefinitely (say it runs a lot of user-space code) then 
> >>> the mmio remains pending in the partial-page buffer?
> >> 
> >> We flush the ring on any exit from the guest, not just MMIO exit.
> >> But yes, from what I understand from the code - if the buffer is only
> >> partially full and we don't take an exit, the buffer doesn't get back to
> >> the host.
> >> 
> >> ioeventfds and such are making exits less common, so yes - it's possible
> >> we won't have an exit in a while.
> >> 
> >>> If that's how it works then i *really* don't like this, this looks 
> >>> like a seriously mis-designed batching feature which might have 
> >>> improved a few server benchmarks but which will introduce random, 
> >>> hard to debug delays all around the place!
> > 
> > The proper way to implement batching is not to do it blindly like 
> > here, but to do what we do in the TLB coalescing/gather code in the 
> > kernel:
> > 
> > 	gather();
> > 
> > 	... submit individual TLB flushes ...
> > 
> > 	flush();
> > 
> > That's how it should be done here too: each virtio driver that issues 
> 
> The world doesn't consist of virtio drivers. It also doesn't 
> consist of only OSs and drivers that we control 100%.

So? I only inquired about latencies, asking what impact on latencies 
is. Regardless of the circumstances we do not want to introduce 
unbound latencies.

If there are no unbound latencies then i'm happy.

> > a group of MMIOs should first start batching, then issue the 
> > individual MMIOs and then flush them.
> > 
> > That can be simplified to leave out the gather() phase, i.e. just 
> > issue batched MMIOs and flush them before exiting the virtio 
> > (guest side) driver routines.
> 
> This acceleration is done to speed up the host kernel<->userspace 
> side.

Yes.

> [...] It's completely independent from the guest. [...]

Well, since user-space gets the MMIOs only once the guest exits it's 
not independent, is it?

> [...] If you want to have the guest communicate fast, create an 
> asynchronous ring and process that. And that's what virtio already 
> does today.
>
> > KVM_CAP_COALESCED_MMIO is an unsafe shortcut hack in its current 
> > form and it looks completely unsafe.
> 
> I haven't tracked the history of it, but I always assumed it was 
> used for repz mov instructions where we already know the size of 
> mmio transactions.

That's why i asked what the effect on latencies is. If there's no 
negative effect then i'm a happy camper.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:47             ` Ingo Molnar
@ 2011-06-04 10:54               ` Alexander Graf
  2011-06-04 11:27                 ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Alexander Graf @ 2011-06-04 10:54 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


On 04.06.2011, at 12:47, Ingo Molnar wrote:

> 
> * Alexander Graf <agraf@suse.de> wrote:
> 
>> 
>> On 04.06.2011, at 12:35, Ingo Molnar wrote:
>> 
>>> 
>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
>>> 
>>>> On Sat, 2011-06-04 at 12:17 +0200, Ingo Molnar wrote:
>>>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
>>>>> 
>>>>>> On Sat, 2011-06-04 at 11:38 +0200, Ingo Molnar wrote:
>>>>>>> * Sasha Levin <levinsasha928@gmail.com> wrote:
>>>>>>> 
>>>>>>>> Coalescing MMIO allows us to avoid an exit every time we have a
>>>>>>>> MMIO write, instead - MMIO writes are coalesced in a ring which
>>>>>>>> can be flushed once an exit for a different reason is needed.
>>>>>>>> A MMIO exit is also trigged once the ring is full.
>>>>>>>> 
>>>>>>>> Coalesce all MMIO regions registered in the MMIO mapper.
>>>>>>>> Add a coalescing handler under kvm_cpu.
>>>>>>> 
>>>>>>> Does this have any effect on latency? I.e. does the guest side 
>>>>>>> guarantee that the pending queue will be flushed after a group of 
>>>>>>> updates have been done?
>>>>>> 
>>>>>> Theres nothing that detects groups of MMIO writes, but the ring size is
>>>>>> a bit less than PAGE_SIZE (half of it is overhead - rest is data) and
>>>>>> we'll exit once the ring is full.
>>>>> 
>>>>> But if the page is only filled partially and if mmio is not submitted 
>>>>> by the guest indefinitely (say it runs a lot of user-space code) then 
>>>>> the mmio remains pending in the partial-page buffer?
>>>> 
>>>> We flush the ring on any exit from the guest, not just MMIO exit.
>>>> But yes, from what I understand from the code - if the buffer is only
>>>> partially full and we don't take an exit, the buffer doesn't get back to
>>>> the host.
>>>> 
>>>> ioeventfds and such are making exits less common, so yes - it's possible
>>>> we won't have an exit in a while.
>>>> 
>>>>> If that's how it works then i *really* don't like this, this looks 
>>>>> like a seriously mis-designed batching feature which might have 
>>>>> improved a few server benchmarks but which will introduce random, 
>>>>> hard to debug delays all around the place!
>>> 
>>> The proper way to implement batching is not to do it blindly like 
>>> here, but to do what we do in the TLB coalescing/gather code in the 
>>> kernel:
>>> 
>>> 	gather();
>>> 
>>> 	... submit individual TLB flushes ...
>>> 
>>> 	flush();
>>> 
>>> That's how it should be done here too: each virtio driver that issues 
>> 
>> The world doesn't consist of virtio drivers. It also doesn't 
>> consist of only OSs and drivers that we control 100%.
> 
> So? I only inquired about latencies, asking what impact on latencies 
> is. Regardless of the circumstances we do not want to introduce 
> unbound latencies.
> 
> If there are no unbound latencies then i'm happy.

Sure, I'm just saying that the mechanism was invented for unmodified guests :).

> 
>>> a group of MMIOs should first start batching, then issue the 
>>> individual MMIOs and then flush them.
>>> 
>>> That can be simplified to leave out the gather() phase, i.e. just 
>>> issue batched MMIOs and flush them before exiting the virtio 
>>> (guest side) driver routines.
>> 
>> This acceleration is done to speed up the host kernel<->userspace 
>> side.
> 
> Yes.
> 
>> [...] It's completely independent from the guest. [...]
> 
> Well, since user-space gets the MMIOs only once the guest exits it's 
> not independent, is it?

If we don't know when a guest ends an MMIO stream, we can't optimize it. Period. If we currently optimize random MMIO requests without caring when they finish, the following would simply break:

enable_interrupts();
writel(doorbell, KICK_ME_NOW);
while(1) ;

void interrupt_handler(void)
{
    break_out_of_loop();
}

And since we don't control the guest, we can't guarantee this to not happen. In fact, I'd actually expect this to be a pretty normal boot loader pattern.

> 
>> [...] If you want to have the guest communicate fast, create an 
>> asynchronous ring and process that. And that's what virtio already 
>> does today.
>> 
>>> KVM_CAP_COALESCED_MMIO is an unsafe shortcut hack in its current 
>>> form and it looks completely unsafe.
>> 
>> I haven't tracked the history of it, but I always assumed it was 
>> used for repz mov instructions where we already know the size of 
>> mmio transactions.
> 
> That's why i asked what the effect on latencies is. If there's no 
> negative effect then i'm a happy camper.

Depends on the trade-off really. You don't care about latencies of disabling an IRQ_ENABLED register for example. You do however care about enabling it :).

Since I haven't implemented coalesced mmio on PPC (yet - not sure it's possible or makes sense), I can't really comment too much on it, so I'll leave this to the guys who worked on it.


Alex


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 10:54               ` Alexander Graf
@ 2011-06-04 11:27                 ` Ingo Molnar
  2011-06-04 11:53                   ` Alexander Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 11:27 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Alexander Graf <agraf@suse.de> wrote:

> > So? I only inquired about latencies, asking what impact on 
> > latencies is. Regardless of the circumstances we do not want to 
> > introduce unbound latencies.
> > 
> > If there are no unbound latencies then i'm happy.
> 
> Sure, I'm just saying that the mechanism was invented for 
> unmodified guests :).

Well, but that does not excuse the introduction of unbound latencies. 
(if those latencies are introduced here - i don't know, i'm asking.)

> > Well, since user-space gets the MMIOs only once the guest exits 
> > it's not independent, is it?
> 
> If we don't know when a guest ends an MMIO stream, we can't 
> optimize it. Period. [...]

But that's no excuse. If you cannot optimize them without 
unnacceptable collateral damage then don't optimize it then.

That's why i asked what the damage is - if there's any damage.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 11:27                 ` Ingo Molnar
@ 2011-06-04 11:53                   ` Alexander Graf
  2011-06-04 14:46                     ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Alexander Graf @ 2011-06-04 11:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


On 04.06.2011, at 13:27, Ingo Molnar wrote:

> 
> * Alexander Graf <agraf@suse.de> wrote:
> 
>>> So? I only inquired about latencies, asking what impact on 
>>> latencies is. Regardless of the circumstances we do not want to 
>>> introduce unbound latencies.
>>> 
>>> If there are no unbound latencies then i'm happy.
>> 
>> Sure, I'm just saying that the mechanism was invented for 
>> unmodified guests :).
> 
> Well, but that does not excuse the introduction of unbound latencies. 
> (if those latencies are introduced here - i don't know, i'm asking.)
> 
>>> Well, since user-space gets the MMIOs only once the guest exits 
>>> it's not independent, is it?
>> 
>> If we don't know when a guest ends an MMIO stream, we can't 
>> optimize it. Period. [...]
> 
> But that's no excuse. If you cannot optimize them without 
> unnacceptable collateral damage then don't optimize it then.
> 
> That's why i asked what the damage is - if there's any damage.

Ok, reading through the source on what coalesced mmio actually does, it seems fairly simple.

  1) the kvm user space user registers a coalesced MMIO region on registers that are not latency crucial
  2) when the guest writes an MMIO, it gets written to a shared page
  3) every time kvm exits its ioctl to user space, the coalesced ring gets analyzed for new data and MMIOs get fired

So the simple rule is: don't register a coalesced MMIO region for a region where latency matters. However, it's great for NVRAM and others where you don't care whether a write actually happens in that moment, as there's no trigger behind it. It might also make sense for selector registers which are meaningless until you ring the doorbell.


Alex


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 11:53                   ` Alexander Graf
@ 2011-06-04 14:46                     ` Ingo Molnar
  2011-06-04 15:22                       ` Alexander Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 14:46 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Alexander Graf <agraf@suse.de> wrote:

> So the simple rule is: don't register a coalesced MMIO region for a 
> region where latency matters. [...]

So my first suspicion is confirmed.

A quick look at Qemu sources shows that lots of drivers are using 
coalesced_mmio without being aware of the latency effects and only 
one seems to make use of qemu_flush_coalesced_mmio_buffer(). Drivers 
like hw/e1000.c sure look latency critical to me.

So i maintain my initial opinion: this is a pretty dangerous 
'optimization' that should be used with extreme care: i can tell it 
you with pretty good authority that latency problems are much more 
easy to introduce than to find and remove ...

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 14:46                     ` Ingo Molnar
@ 2011-06-04 15:22                       ` Alexander Graf
  2011-06-04 16:34                         ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Alexander Graf @ 2011-06-04 15:22 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


On 04.06.2011, at 16:46, Ingo Molnar wrote:

> 
> * Alexander Graf <agraf@suse.de> wrote:
> 
>> So the simple rule is: don't register a coalesced MMIO region for a 
>> region where latency matters. [...]
> 
> So my first suspicion is confirmed.
> 
> A quick look at Qemu sources shows that lots of drivers are using 
> coalesced_mmio without being aware of the latency effects and only 
> one seems to make use of qemu_flush_coalesced_mmio_buffer(). Drivers 
> like hw/e1000.c sure look latency critical to me.

e1000 maps its NVRAM on coalesced mmio - which is completely ok.

> So i maintain my initial opinion: this is a pretty dangerous 
> 'optimization' that should be used with extreme care: i can tell it 
> you with pretty good authority that latency problems are much more 
> easy to introduce than to find and remove ...

Yup, which is why it's very sparsely used in qemu :). Basically, it's only e1000 and vga, both of which are heavily used and tested drivers.


Alex


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 15:22                       ` Alexander Graf
@ 2011-06-04 16:34                         ` Ingo Molnar
  2011-06-04 16:50                           ` Sasha Levin
  0 siblings, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2011-06-04 16:34 UTC (permalink / raw)
  To: Alexander Graf
  Cc: Sasha Levin, penberg, kvm, asias.hejun, gorcunov, prasadjoshi124


* Alexander Graf <agraf@suse.de> wrote:

> 
> On 04.06.2011, at 16:46, Ingo Molnar wrote:
> 
> > 
> > * Alexander Graf <agraf@suse.de> wrote:
> > 
> >> So the simple rule is: don't register a coalesced MMIO region for a 
> >> region where latency matters. [...]
> > 
> > So my first suspicion is confirmed.
> > 
> > A quick look at Qemu sources shows that lots of drivers are using 
> > coalesced_mmio without being aware of the latency effects and only 
> > one seems to make use of qemu_flush_coalesced_mmio_buffer(). Drivers 
> > like hw/e1000.c sure look latency critical to me.
> 
> e1000 maps its NVRAM on coalesced mmio - which is completely ok.

Ok!

> > So i maintain my initial opinion: this is a pretty dangerous 
> > 'optimization' that should be used with extreme care: i can tell 
> > it you with pretty good authority that latency problems are much 
> > more easy to introduce than to find and remove ...
> 
> Yup, which is why it's very sparsely used in qemu :). Basically, 
> it's only e1000 and vga, both of which are heavily used and tested 
> drivers.

Ok, so this change in:

 commit 73389b5ea017288a949ae27536c8cfd298d3e317
 Author: Sasha Levin <levinsasha928@gmail.com>
 Date:   Fri Jun 3 22:51:08 2011 +0300

    kvm tools: Add MMIO coalescing support

@@ -67,6 +70,16 @@ bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callba
                .kvm_mmio_callback_fn = kvm_mmio_callback_fn,
        };
 
+       zone = (struct kvm_coalesced_mmio_zone) {
+               .addr   = phys_addr,
+               .size   = phys_addr_len,
+       };
+       ret = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
+       if (ret < 0) {
+               free(mmio);
+               return false;
+       }

Seems completely wrong, because it indiscriminately registers *all* 
mmio regions as coalesced ones.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] kvm tools: Add MMIO coalescing support
  2011-06-04 16:34                         ` Ingo Molnar
@ 2011-06-04 16:50                           ` Sasha Levin
  0 siblings, 0 replies; 15+ messages in thread
From: Sasha Levin @ 2011-06-04 16:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Alexander Graf, penberg, kvm, asias.hejun, gorcunov,
	prasadjoshi124

On Sat, 2011-06-04 at 18:34 +0200, Ingo Molnar wrote:
> * Alexander Graf <agraf@suse.de> wrote:
> 
> > 
> > On 04.06.2011, at 16:46, Ingo Molnar wrote:
> > 
> > > 
> > > * Alexander Graf <agraf@suse.de> wrote:
> > > 
> > >> So the simple rule is: don't register a coalesced MMIO region for a 
> > >> region where latency matters. [...]
> > > 
> > > So my first suspicion is confirmed.
> > > 
> > > A quick look at Qemu sources shows that lots of drivers are using 
> > > coalesced_mmio without being aware of the latency effects and only 
> > > one seems to make use of qemu_flush_coalesced_mmio_buffer(). Drivers 
> > > like hw/e1000.c sure look latency critical to me.
> > 
> > e1000 maps its NVRAM on coalesced mmio - which is completely ok.
> 
> Ok!
> 
> > > So i maintain my initial opinion: this is a pretty dangerous 
> > > 'optimization' that should be used with extreme care: i can tell 
> > > it you with pretty good authority that latency problems are much 
> > > more easy to introduce than to find and remove ...
> > 
> > Yup, which is why it's very sparsely used in qemu :). Basically, 
> > it's only e1000 and vga, both of which are heavily used and tested 
> > drivers.
> 
> Ok, so this change in:
> 
>  commit 73389b5ea017288a949ae27536c8cfd298d3e317
>  Author: Sasha Levin <levinsasha928@gmail.com>
>  Date:   Fri Jun 3 22:51:08 2011 +0300
> 
>     kvm tools: Add MMIO coalescing support
> 
> @@ -67,6 +70,16 @@ bool kvm__register_mmio(u64 phys_addr, u64 phys_addr_len, void (*kvm_mmio_callba
>                 .kvm_mmio_callback_fn = kvm_mmio_callback_fn,
>         };
>  
> +       zone = (struct kvm_coalesced_mmio_zone) {
> +               .addr   = phys_addr,
> +               .size   = phys_addr_len,
> +       };
> +       ret = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
> +       if (ret < 0) {
> +               free(mmio);
> +               return false;
> +       }
> 
> Seems completely wrong, because it indiscriminately registers *all* 
> mmio regions as coalesced ones.

Yes. I'll add a flag instead of making all of them coalesced.

-- 

Sasha.


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-06-04 16:50 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-03 19:51 [PATCH] kvm tools: Add MMIO coalescing support Sasha Levin
2011-06-04  9:38 ` Ingo Molnar
2011-06-04 10:14   ` Sasha Levin
2011-06-04 10:17     ` Ingo Molnar
2011-06-04 10:28       ` Sasha Levin
2011-06-04 10:35         ` Ingo Molnar
2011-06-04 10:39           ` Alexander Graf
2011-06-04 10:47             ` Ingo Molnar
2011-06-04 10:54               ` Alexander Graf
2011-06-04 11:27                 ` Ingo Molnar
2011-06-04 11:53                   ` Alexander Graf
2011-06-04 14:46                     ` Ingo Molnar
2011-06-04 15:22                       ` Alexander Graf
2011-06-04 16:34                         ` Ingo Molnar
2011-06-04 16:50                           ` Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox