[PATCH 0/2] Batch writes to MMIO

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] Batch writes to MMIO
@ 2008-04-23 13:07 Laurent Vivier
  2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
  2008-04-23 14:05 ` [PATCH 0/2] " Avi Kivity
  0 siblings, 2 replies; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 13:07 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier


These two patches allow to batch writes to MMIO.

When kernel has to send MMIO writes to userspace, it stores them
in memory until it has to pass the hand to userspace for another
reason. This avoids to have too many context switches on operations
that can wait.

WARNING: this breaks compatibility with old userspace part.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>

[PATCH 1/2] kvm: Batch writes to MMIO
        - kernel part

[PATCH 2/2] kvm-userspace: Batch writes to MMIO
        - userspace part

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/2] kvm: Batch writes to MMIO
  2008-04-23 13:07 [PATCH 0/2] Batch writes to MMIO Laurent Vivier
@ 2008-04-23 13:07 ` Laurent Vivier
  2008-04-23 13:07   ` [PATCH 2/2] kvm-userspace: " Laurent Vivier
                     ` (2 more replies)
  2008-04-23 14:05 ` [PATCH 0/2] " Avi Kivity
  1 sibling, 3 replies; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 13:07 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier

This patch is the kernel part of the "batch writes to MMIO" patch.

When kernel has to send MMIO writes to userspace, it stores them
in memory until it has to pass the hand to userspace for another
reason. This avoids to have too many context switches on operations
that can wait.

WARNING: this breaks compatibility with old userspace part.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
---
 arch/x86/kvm/x86.c         |   21 +++++++++++++++++++++
 include/asm-x86/kvm_host.h |    2 ++
 include/linux/kvm.h        |   10 +++++++++-
 virt/kvm/kvm_main.c        |    3 +++
 4 files changed, 35 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ce5563..3881056 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2942,8 +2942,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_x86_ops->decache_regs(vcpu);
 	}
 
+batch:
 	r = __vcpu_run(vcpu, kvm_run);
 
+	if (!r && vcpu->mmio_is_write &&
+	    kvm_run->exit_reason == KVM_EXIT_MMIO &&
+	    kvm_run->batch_count < KVM_MAX_BATCH) {
+		struct kvm_batch *batch = vcpu->arch.batch_data;
+		int i = kvm_run->batch_count++;
+
+		batch[i].phys_addr = vcpu->mmio_phys_addr;
+		batch[i].len = vcpu->mmio_size;
+		memcpy(batch[i].data, vcpu->mmio_data, batch[i].len);
+
+		goto batch;
+	}
 out:
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -3830,6 +3843,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	}
 	vcpu->arch.pio_data = page_address(page);
 
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page) {
+		r = -ENOMEM;
+		goto fail;
+	}
+	vcpu->arch.batch_data = page_address(page);
+
 	r = kvm_mmu_create(vcpu);
 	if (r < 0)
 		goto fail_free_pio_data;
@@ -3857,6 +3877,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvm_mmu_destroy(vcpu);
 	up_read(&vcpu->kvm->slots_lock);
 	free_page((unsigned long)vcpu->arch.pio_data);
+	free_page((unsigned long)vcpu->arch.batch_data);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 9d963cd..2824652 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -26,6 +26,7 @@
 #define KVM_PRIVATE_MEM_SLOTS 4
 
 #define KVM_PIO_PAGE_OFFSET 1
+#define KVM_MMIO_PAGE_OFFSET 2
 
 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
 #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
@@ -255,6 +256,7 @@ struct kvm_vcpu_arch {
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
+	void *batch_data;
 
 	struct kvm_queued_exception {
 		bool pending;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a281afe..cf0d266 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -94,7 +94,8 @@ struct kvm_run {
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u8 batch_count;
+	__u8 padding2;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -173,6 +174,13 @@ struct kvm_run {
 	};
 };
 
+#define KVM_MAX_BATCH (PAGE_SIZE / sizeof(struct kvm_batch))
+struct kvm_batch {
+	__u64 phys_addr;
+	__u32 len;
+	__u8  data[8];
+};
+
 /* for KVM_TRANSLATE */
 struct kvm_translation {
 	/* in */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d3cb4cc..b2234b3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -796,6 +796,8 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #ifdef CONFIG_X86
 	else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET)
 		page = virt_to_page(vcpu->arch.pio_data);
+	else if (vmf->pgoff == KVM_MMIO_PAGE_OFFSET)
+		page = virt_to_page(vcpu->arch.batch_data);
 #endif
 	else
 		return VM_FAULT_SIGBUS;
@@ -1214,6 +1216,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = PAGE_SIZE;     /* struct kvm_run */
 #ifdef CONFIG_X86
 		r += PAGE_SIZE;    /* pio data page */
+		r += PAGE_SIZE;    /* mmio batch page */
 #endif
 		break;
 	case KVM_TRACE_ENABLE:
-- 
1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/2] kvm-userspace: Batch writes to MMIO
  2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
@ 2008-04-23 13:07   ` Laurent Vivier
  2008-04-23 14:31   ` [PATCH 1/2] kvm: " Avi Kivity
  2008-04-23 16:53   ` Anthony Liguori
  2 siblings, 0 replies; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 13:07 UTC (permalink / raw)
  To: kvm-devel; +Cc: Laurent Vivier

This patch is userspace part of the "batch writes to MMIO" patch.

When kernel has to send MMIO writes to userspace, it stores them
in memory until it has to pass the hand to userspace for another
reason. This avoids too have to many context switches on operations
that can wait.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
---
 libkvm/libkvm.c |   17 +++++++++++++++++
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 329f29f..be74477 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -864,6 +864,10 @@ int kvm_run(kvm_context_t kvm, int vcpu)
 	int r;
 	int fd = kvm->vcpu_fd[vcpu];
 	struct kvm_run *run = kvm->run[vcpu];
+#if defined(__x86_64__) || defined(__i386__)
+	struct kvm_batch *batch = (void *)run + 2 * PAGE_SIZE;
+	int i;
+#endif
 
 again:
 	if (!kvm->irqchip_in_kernel)
@@ -882,6 +886,19 @@ again:
 
 	post_kvm_run(kvm, vcpu);
 
+#if defined(__x86_64__) || defined(__i386__)
+	for (i = 0; i < run->batch_count; i++) {
+		if ((batch[i].phys_addr > 0xa0000-4 &&
+		     batch[i].phys_addr <= 0xa0000) && batch[i].len == 3)
+			continue;
+		kvm->callbacks->mmio_write(kvm->opaque,
+					   batch[i].phys_addr,
+					   &batch[i].data[0], batch[i].len);
+
+	}
+	run->batch_count = 0;
+#endif
+
 	if (r == -1) {
 		r = handle_io_window(kvm);
 		goto more;
-- 
1.5.2.4


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 13:07 [PATCH 0/2] Batch writes to MMIO Laurent Vivier
  2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
@ 2008-04-23 14:05 ` Avi Kivity
  2008-04-23 14:22   ` Laurent Vivier
  1 sibling, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 14:05 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel

Laurent Vivier wrote:
> These two patches allow to batch writes to MMIO.
>
> When kernel has to send MMIO writes to userspace, it stores them
> in memory until it has to pass the hand to userspace for another
> reason. This avoids to have too many context switches on operations
> that can wait.
>
>   

Did you obtain any measurable performance benefit?

> WARNING: this breaks compatibility with old userspace part.
>
>   

So it's just an RFC :)

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 14:05 ` [PATCH 0/2] " Avi Kivity
@ 2008-04-23 14:22   ` Laurent Vivier
  2008-04-23 14:40     ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 14:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel


Le mercredi 23 avril 2008 à 17:05 +0300, Avi Kivity a écrit :
> Laurent Vivier wrote:
> > These two patches allow to batch writes to MMIO.
> >
> > When kernel has to send MMIO writes to userspace, it stores them
> > in memory until it has to pass the hand to userspace for another
> > reason. This avoids to have too many context switches on operations
> > that can wait.
> >
> >   
> 
> Did you obtain any measurable performance benefit?

Well, the problem is how to measure it. Really, I don't know.

But when I add traces I saw MMIO writes are batched: by group of 170
(this is the max in a page) at the beginning, and by group of 10 with XP
when we move a window.

So all comments are welcome...

> > WARNING: this breaks compatibility with old userspace part.
> >
> >   
> 
> So it's just an RFC :)

Yes... to have some comments how to manage this :)

Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] kvm: Batch writes to MMIO
  2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
  2008-04-23 13:07   ` [PATCH 2/2] kvm-userspace: " Laurent Vivier
@ 2008-04-23 14:31   ` Avi Kivity
  2008-04-23 14:59     ` Laurent Vivier
  2008-04-23 16:53   ` Anthony Liguori
  2 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 14:31 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel

Laurent Vivier wrote:
> This patch is the kernel part of the "batch writes to MMIO" patch.
>
> When kernel has to send MMIO writes to userspace, it stores them
> in memory until it has to pass the hand to userspace for another
> reason. This avoids to have too many context switches on operations
> that can wait.
>
> WARNING: this breaks compatibility with old userspace part.
>
> Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
> ---
>  arch/x86/kvm/x86.c         |   21 +++++++++++++++++++++
>  include/asm-x86/kvm_host.h |    2 ++
>  include/linux/kvm.h        |   10 +++++++++-
>  virt/kvm/kvm_main.c        |    3 +++
>  4 files changed, 35 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0ce5563..3881056 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2942,8 +2942,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  		kvm_x86_ops->decache_regs(vcpu);
>  	}
>  
> +batch:
>  	r = __vcpu_run(vcpu, kvm_run);
>  
> +	if (!r && vcpu->mmio_is_write &&
> +	    kvm_run->exit_reason == KVM_EXIT_MMIO &&
> +	    kvm_run->batch_count < KVM_MAX_BATCH) {
> +		struct kvm_batch *batch = vcpu->arch.batch_data;
> +		int i = kvm_run->batch_count++;
> +
> +		batch[i].phys_addr = vcpu->mmio_phys_addr;
> +		batch[i].len = vcpu->mmio_size;
> +		memcpy(batch[i].data, vcpu->mmio_data, batch[i].len);
>   

This breaks ordering on smp guests. batch_data needs to be a kvm thing, 
not a vcpu thing, and locked, of course.

Also, you don't want to queue writes which trigger I/O since that will 
affect latency.  Userspace should tell the kernel which mmio addresses 
are queuable.

> +
> +		goto batch;
>   

If you move this to within __vcpu_run, you won't need to loop.  Maybe 
the best place is where we actually decide it's an mmio write.

You also need to flush the queue each time you have an in-kernel mmio 
write.  For example:

vcpu0             vcpu1

mmio write (queued)
apic write -> IPI
                     doesn't see effects of write

> +	}
>  out:
>  	if (vcpu->sigset_active)
>  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> @@ -3830,6 +3843,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>  	}
>  	vcpu->arch.pio_data = page_address(page);
>  
> +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!page) {
> +		r = -ENOMEM;
> +		goto fail;
> +	}
> +	vcpu->arch.batch_data = page_address(page);
> +
>  	r = kvm_mmu_create(vcpu);
>  	if (r < 0)
>  		goto fail_free_pio_data;
> @@ -3857,6 +3877,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
>  	kvm_mmu_destroy(vcpu);
>  	up_read(&vcpu->kvm->slots_lock);
>  	free_page((unsigned long)vcpu->arch.pio_data);
> +	free_page((unsigned long)vcpu->arch.batch_data);
>  }
>  
>  #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
>  #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
> @@ -255,6 +256,7 @@ struct kvm_vcpu_arch {
>  	gva_t mmio_fault_cr2;
>  	struct kvm_pio_request pio;
>  	void *pio_data;
> +	void *batch_data;
>  
>   

It's an array of structs, no?  So it shouldn't be a void *.

>  
> +#define KVM_MAX_BATCH (PAGE_SIZE / sizeof(struct kvm_batch))
> +struct kvm_batch {
> +	__u64 phys_addr;
> +	__u32 len;
> +	__u8  data[8];
> +};
>   

Size is 24 on 64-bit and 20 on 32-bit.  Please pad (after len, so data 
is nicely aligned).


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 14:22   ` Laurent Vivier
@ 2008-04-23 14:40     ` Avi Kivity
  2008-04-23 15:10       ` Anthony Liguori
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 14:40 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel

Laurent Vivier wrote:
> Le mercredi 23 avril 2008 à 17:05 +0300, Avi Kivity a écrit :
>   
>> Laurent Vivier wrote:
>>     
>>> These two patches allow to batch writes to MMIO.
>>>
>>> When kernel has to send MMIO writes to userspace, it stores them
>>> in memory until it has to pass the hand to userspace for another
>>> reason. This avoids to have too many context switches on operations
>>> that can wait.
>>>
>>>   
>>>       
>> Did you obtain any measurable performance benefit?
>>     
>
> Well, the problem is how to measure it. Really, I don't know.
>
> But when I add traces I saw MMIO writes are batched: by group of 170
> (this is the max in a page) at the beginning,

That's the lovely animation. You can time XP startup, or look at system 
time in 'top' once it stabilizes.  Also compare total host_state_reloads 
to boot in kvm_stat (each costs 5 usec, after discount).

>  and by group of 10 with XP
> when we move a window.
>   

That's probably programming the cirrus blitter, also a candidate for 
batching, but probably not measurable.  Maybe one of those Windows 
graphics benchmarks which draw tons of rectangles very fast and cause 
epilepsy seizures.

I guess most interesting is e1000, if we need to do more than one mmio 
to start transmitting.


-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] kvm: Batch writes to MMIO
  2008-04-23 14:31   ` [PATCH 1/2] kvm: " Avi Kivity
@ 2008-04-23 14:59     ` Laurent Vivier
  2008-04-23 15:04       ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 14:59 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel


Le mercredi 23 avril 2008 à 17:31 +0300, Avi Kivity a écrit :
> Laurent Vivier wrote:
> > This patch is the kernel part of the "batch writes to MMIO" patch.
> >
> > When kernel has to send MMIO writes to userspace, it stores them
> > in memory until it has to pass the hand to userspace for another
> > reason. This avoids to have too many context switches on operations
> > that can wait.
> >
> > WARNING: this breaks compatibility with old userspace part.
> >
> > Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
> > ---
> >  arch/x86/kvm/x86.c         |   21 +++++++++++++++++++++
> >  include/asm-x86/kvm_host.h |    2 ++
> >  include/linux/kvm.h        |   10 +++++++++-
> >  virt/kvm/kvm_main.c        |    3 +++
> >  4 files changed, 35 insertions(+), 1 deletions(-)
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 0ce5563..3881056 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2942,8 +2942,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> >  		kvm_x86_ops->decache_regs(vcpu);
> >  	}
> >  
> > +batch:
> >  	r = __vcpu_run(vcpu, kvm_run);
> >  
> > +	if (!r && vcpu->mmio_is_write &&
> > +	    kvm_run->exit_reason == KVM_EXIT_MMIO &&
> > +	    kvm_run->batch_count < KVM_MAX_BATCH) {
> > +		struct kvm_batch *batch = vcpu->arch.batch_data;
> > +		int i = kvm_run->batch_count++;
> > +
> > +		batch[i].phys_addr = vcpu->mmio_phys_addr;
> > +		batch[i].len = vcpu->mmio_size;
> > +		memcpy(batch[i].data, vcpu->mmio_data, batch[i].len);
> >   
> 
> This breaks ordering on smp guests. batch_data needs to be a kvm thing, 
> not a vcpu thing, and locked, of course.

- is ordering between vcpu important when we already delay operations ?
- using vcpu avoids the lock
- Why PIO (pio_data) are vcpu things and not kvm things, then ?

> Also, you don't want to queue writes which trigger I/O since that will 
> affect latency.  Userspace should tell the kernel which mmio addresses 
> are queuable.

I agree (but in my first patch it was easier to ignore this)

> > +
> > +		goto batch;
> >   
> 
> If you move this to within __vcpu_run, you won't need to loop.  Maybe 
> the best place is where we actually decide it's an mmio write.

I agree

> You also need to flush the queue each time you have an in-kernel mmio 
> write.  For example:
> 
> vcpu0             vcpu1
> 
> mmio write (queued)
> apic write -> IPI
>                      doesn't see effects of write

I agree

> > +	}
> >  out:
> >  	if (vcpu->sigset_active)
> >  		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
> > @@ -3830,6 +3843,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> >  	}
> >  	vcpu->arch.pio_data = page_address(page);
> >  
> > +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (!page) {
> > +		r = -ENOMEM;
> > +		goto fail;
> > +	}
> > +	vcpu->arch.batch_data = page_address(page);
> > +
> >  	r = kvm_mmu_create(vcpu);
> >  	if (r < 0)
> >  		goto fail_free_pio_data;
> > @@ -3857,6 +3877,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> >  	kvm_mmu_destroy(vcpu);
> >  	up_read(&vcpu->kvm->slots_lock);
> >  	free_page((unsigned long)vcpu->arch.pio_data);
> > +	free_page((unsigned long)vcpu->arch.batch_data);
> >  }
> >  
> >  #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1)
> >  #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD))
> > @@ -255,6 +256,7 @@ struct kvm_vcpu_arch {
> >  	gva_t mmio_fault_cr2;
> >  	struct kvm_pio_request pio;
> >  	void *pio_data;
> > +	void *batch_data;
> >  
> >   
> 
> It's an array of structs, no?  So it shouldn't be a void *.

Yes (I love cut&paste...)

> >  
> > +#define KVM_MAX_BATCH (PAGE_SIZE / sizeof(struct kvm_batch))
> > +struct kvm_batch {
> > +	__u64 phys_addr;
> > +	__u32 len;
> > +	__u8  data[8];
> > +};
> >   
> 
> Size is 24 on 64-bit and 20 on 32-bit.  Please pad (after len, so data 
> is nicely aligned).

OK

Thank you, 
Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] kvm: Batch writes to MMIO
  2008-04-23 14:59     ` Laurent Vivier
@ 2008-04-23 15:04       ` Avi Kivity
  0 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 15:04 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel

Laurent Vivier wrote:
>>>   
>>>       
>> This breaks ordering on smp guests. batch_data needs to be a kvm thing, 
>> not a vcpu thing, and locked, of course.
>>     
>
> - is ordering between vcpu important when we already delay operations ?
>   

Yes.  Ordering is very different from delaying.  Also, we will only 
delay writes which don't matter (those that simply latch something into 
a register, which is later read).

> - using vcpu avoids the lock
>   

Correctness beats performance

> - Why PIO (pio_data) are vcpu things and not kvm things, then ?
>   

pio batching is used for just a single instruction that can work with 
batches (ins/outs), so a guest can't expect to see partial output.

With mmio, you can have

1. mmio
2. raise flag in shared memory
3. other vcpu sees flag, does mmio
4. spin-wait for ack from other vcpu
5. mmio

You can't do that with ins/outs.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 14:40     ` Avi Kivity
@ 2008-04-23 15:10       ` Anthony Liguori
  2008-04-23 15:12         ` Avi Kivity
  2008-04-23 16:24         ` Laurent Vivier
  0 siblings, 2 replies; 20+ messages in thread
From: Anthony Liguori @ 2008-04-23 15:10 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, Laurent Vivier

Avi Kivity wrote:
> Laurent Vivier wrote:
>   
> That's the lovely animation. You can time XP startup, or look at system 
> time in 'top' once it stabilizes.  Also compare total host_state_reloads 
> to boot in kvm_stat (each costs 5 usec, after discount).
>   

Win2k's boot up animation (the moving bar) is done in VGA planar mode.  
You should be able to time the difference in startup.

>>  and by group of 10 with XP
>> when we move a window.
>>   
>>     
>
> That's probably programming the cirrus blitter, also a candidate for 
> batching, but probably not measurable.  Maybe one of those Windows 
> graphics benchmarks which draw tons of rectangles very fast and cause 
> epilepsy seizures.
>
> I guess most interesting is e1000, if we need to do more than one mmio 
> to start transmitting.
>   

The ne2k is pretty mmio heavy.  You should be able to observe a boost 
with something like iperf (guest=>host) I would think if this is a real 
savings.

Regards,

Anthony Liguori



-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 15:10       ` Anthony Liguori
@ 2008-04-23 15:12         ` Avi Kivity
  2008-04-23 15:47           ` Anthony Liguori
  2008-04-23 16:24         ` Laurent Vivier
  1 sibling, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 15:12 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Laurent Vivier

Anthony Liguori wrote:
>
> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
> with something like iperf (guest=>host) I would think if this is a 
> real savings.
>

If we're just improving ne2k, the complexity isn't worth it.  We have 
two better nics which are widely supported in guests.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 15:12         ` Avi Kivity
@ 2008-04-23 15:47           ` Anthony Liguori
  2008-04-23 17:13             ` Avi Kivity
  0 siblings, 1 reply; 20+ messages in thread
From: Anthony Liguori @ 2008-04-23 15:47 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel, Laurent Vivier

Avi Kivity wrote:
> Anthony Liguori wrote:
>>
>> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
>> with something like iperf (guest=>host) I would think if this is a 
>> real savings.
>>
>
> If we're just improving ne2k, the complexity isn't worth it.  We have 
> two better nics which are widely supported in guests.

Sure, but the ne2k should be pretty close to an optimal synthetic 
benchmark for batching.  If we don't see a big improvement with it, 
we're probably not going to see a big improvement with it for anything.

As you point out though, the converse isn't necessarily true.

Regards,

Anthony Liguori


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 15:10       ` Anthony Liguori
  2008-04-23 15:12         ` Avi Kivity
@ 2008-04-23 16:24         ` Laurent Vivier
  2008-04-23 16:25           ` Avi Kivity
  1 sibling, 1 reply; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 16:24 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Avi Kivity

Le mercredi 23 avril 2008 à 10:10 -0500, Anthony Liguori a écrit :
[...]
> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
> with something like iperf (guest=>host) I would think if this is a real 
> savings.

I like your advices :-D

I use iperf with e1000 emulation and a slightly modified patch (to
detect MMIO write in a loop), server is on the host, client on the
guest, with default values.

RESULT WITHOUT BATCHING:

[  4]  0.0-10.0 sec    235 MBytes    197 Mbits/sec
[  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
[  4]  0.0-10.0 sec    185 MBytes    155 Mbits/sec
[  5]  0.0-10.0 sec    227 MBytes    190 Mbits/sec
[  4]  0.0-10.0 sec    196 MBytes    164 Mbits/sec
[  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
[  4]  0.0-10.0 sec    184 MBytes    154 Mbits/sec

RESULT WITH BATCHING:

------------------------------------------------------------
Server listening on TCP port 5001
TCP window size: 85.3 KByte (default)
------------------------------------------------------------
[  4]  0.0-10.0 sec    357 MBytes    299 Mbits/sec
[  5]  0.0-10.1 sec    418 MBytes    347 Mbits/sec
[  4]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
[  5]  0.0-10.0 sec    422 MBytes    353 Mbits/sec
[  4]  0.0-10.1 sec    436 MBytes    362 Mbits/sec
[  5]  0.0-10.0 sec    416 MBytes    348 Mbits/sec
[  4]  0.0-10.0 sec    431 MBytes    361 Mbits/sec

Well, it's nice ?

Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 16:24         ` Laurent Vivier
@ 2008-04-23 16:25           ` Avi Kivity
  2008-04-23 16:41             ` Laurent Vivier
  0 siblings, 1 reply; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 16:25 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel

Laurent Vivier wrote:
> Le mercredi 23 avril 2008 à 10:10 -0500, Anthony Liguori a écrit :
> [...]
>   
>> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
>> with something like iperf (guest=>host) I would think if this is a real 
>> savings.
>>     
>
> I like your advices :-D
>
> I use iperf with e1000 emulation and a slightly modified patch (to
> detect MMIO write in a loop), server is on the host, client on the
> guest, with default values.
>
> RESULT WITHOUT BATCHING:
>
> [  4]  0.0-10.0 sec    235 MBytes    197 Mbits/sec
> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> [  4]  0.0-10.0 sec    185 MBytes    155 Mbits/sec
> [  5]  0.0-10.0 sec    227 MBytes    190 Mbits/sec
> [  4]  0.0-10.0 sec    196 MBytes    164 Mbits/sec
> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> [  4]  0.0-10.0 sec    184 MBytes    154 Mbits/sec
>
> RESULT WITH BATCHING:
>
> ------------------------------------------------------------
> Server listening on TCP port 5001
> TCP window size: 85.3 KByte (default)
> ------------------------------------------------------------
> [  4]  0.0-10.0 sec    357 MBytes    299 Mbits/sec
> [  5]  0.0-10.1 sec    418 MBytes    347 Mbits/sec
> [  4]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
> [  5]  0.0-10.0 sec    422 MBytes    353 Mbits/sec
> [  4]  0.0-10.1 sec    436 MBytes    362 Mbits/sec
> [  5]  0.0-10.0 sec    416 MBytes    348 Mbits/sec
> [  4]  0.0-10.0 sec    431 MBytes    361 Mbits/sec
>
> Well, it's nice ?
>   

It's too good to be true.

I think we're seeing two bugs cancel each other out, resulting in a 
performance gain.  Linux doesn't know how to queue outgoing packets, so 
it bangs on the mmio that starts the transmit after every packet.  mmio 
batching doesn't know that this mmio register is critical for latency, 
so it queues it up.  The result is that you you get not just mmio 
batching, but also packet batching!  Which dramatically improves 
performace at the expense of latency.


Sorry (if it's true :)

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 16:25           ` Avi Kivity
@ 2008-04-23 16:41             ` Laurent Vivier
  2008-04-23 16:48               ` Anthony Liguori
  0 siblings, 1 reply; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 16:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel


Le mercredi 23 avril 2008 à 19:25 +0300, Avi Kivity a écrit :
> Laurent Vivier wrote:
> > Le mercredi 23 avril 2008 à 10:10 -0500, Anthony Liguori a écrit :
> > [...]
> >   
> >> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
> >> with something like iperf (guest=>host) I would think if this is a real 
> >> savings.
> >>     
> >
> > I like your advices :-D
> >
> > I use iperf with e1000 emulation and a slightly modified patch (to
> > detect MMIO write in a loop), server is on the host, client on the
> > guest, with default values.
> >
> > RESULT WITHOUT BATCHING:
> >
> > [  4]  0.0-10.0 sec    235 MBytes    197 Mbits/sec
> > [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> > [  4]  0.0-10.0 sec    185 MBytes    155 Mbits/sec
> > [  5]  0.0-10.0 sec    227 MBytes    190 Mbits/sec
> > [  4]  0.0-10.0 sec    196 MBytes    164 Mbits/sec
> > [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> > [  4]  0.0-10.0 sec    184 MBytes    154 Mbits/sec
> >
> > RESULT WITH BATCHING:
> >
> > ------------------------------------------------------------
> > Server listening on TCP port 5001
> > TCP window size: 85.3 KByte (default)
> > ------------------------------------------------------------
> > [  4]  0.0-10.0 sec    357 MBytes    299 Mbits/sec
> > [  5]  0.0-10.1 sec    418 MBytes    347 Mbits/sec
> > [  4]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
> > [  5]  0.0-10.0 sec    422 MBytes    353 Mbits/sec
> > [  4]  0.0-10.1 sec    436 MBytes    362 Mbits/sec
> > [  5]  0.0-10.0 sec    416 MBytes    348 Mbits/sec
> > [  4]  0.0-10.0 sec    431 MBytes    361 Mbits/sec
> >
> > Well, it's nice ?
> >   
> 
> It's too good to be true.
> 
> I think we're seeing two bugs cancel each other out, resulting in a 
> performance gain.  Linux doesn't know how to queue outgoing packets, so 
> it bangs on the mmio that starts the transmit after every packet.  mmio 
> batching doesn't know that this mmio register is critical for latency, 
> so it queues it up.  The result is that you you get not just mmio 
> batching, but also packet batching!  Which dramatically improves 
> performace at the expense of latency.

How can I check that ? How can I measure latency ?
Perhaps I can swap server and client between guest and host ?

> 
> Sorry (if it's true :)
>

Thank you for your help,
Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 16:41             ` Laurent Vivier
@ 2008-04-23 16:48               ` Anthony Liguori
  2008-04-23 16:51                 ` Avi Kivity
  2008-04-23 18:54                 ` Laurent Vivier
  0 siblings, 2 replies; 20+ messages in thread
From: Anthony Liguori @ 2008-04-23 16:48 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel, Avi Kivity

Laurent Vivier wrote:
> Le mercredi 23 avril 2008 à 19:25 +0300, Avi Kivity a écrit :
>   
>> Laurent Vivier wrote:
>>     
>>> Le mercredi 23 avril 2008 à 10:10 -0500, Anthony Liguori a écrit :
>>> [...]
>>>   
>>>       
>>>> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
>>>> with something like iperf (guest=>host) I would think if this is a real 
>>>> savings.
>>>>     
>>>>         
>>> I like your advices :-D
>>>
>>> I use iperf with e1000 emulation and a slightly modified patch (to
>>> detect MMIO write in a loop), server is on the host, client on the
>>> guest, with default values.
>>>
>>> RESULT WITHOUT BATCHING:
>>>
>>> [  4]  0.0-10.0 sec    235 MBytes    197 Mbits/sec
>>> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
>>> [  4]  0.0-10.0 sec    185 MBytes    155 Mbits/sec
>>> [  5]  0.0-10.0 sec    227 MBytes    190 Mbits/sec
>>> [  4]  0.0-10.0 sec    196 MBytes    164 Mbits/sec
>>> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
>>> [  4]  0.0-10.0 sec    184 MBytes    154 Mbits/sec
>>>
>>> RESULT WITH BATCHING:
>>>
>>> ------------------------------------------------------------
>>> Server listening on TCP port 5001
>>> TCP window size: 85.3 KByte (default)
>>> ------------------------------------------------------------
>>> [  4]  0.0-10.0 sec    357 MBytes    299 Mbits/sec
>>> [  5]  0.0-10.1 sec    418 MBytes    347 Mbits/sec
>>> [  4]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
>>> [  5]  0.0-10.0 sec    422 MBytes    353 Mbits/sec
>>> [  4]  0.0-10.1 sec    436 MBytes    362 Mbits/sec
>>> [  5]  0.0-10.0 sec    416 MBytes    348 Mbits/sec
>>> [  4]  0.0-10.0 sec    431 MBytes    361 Mbits/sec
>>>
>>> Well, it's nice ?
>>>   
>>>       
>> It's too good to be true.
>>
>> I think we're seeing two bugs cancel each other out, resulting in a 
>> performance gain.  Linux doesn't know how to queue outgoing packets, so 
>> it bangs on the mmio that starts the transmit after every packet.  mmio 
>> batching doesn't know that this mmio register is critical for latency, 
>> so it queues it up.  The result is that you you get not just mmio 
>> batching, but also packet batching!  Which dramatically improves 
>> performace at the expense of latency.
>>     
>
> How can I check that ? How can I measure latency ?
>   

ping (from guest to host)

Regards,

Anthony Liguori

> Perhaps I can swap server and client between guest and host ?
>
>   
>> Sorry (if it's true :)
>>
>>     
>
> Thank you for your help,
> Laurent
>   


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 16:48               ` Anthony Liguori
@ 2008-04-23 16:51                 ` Avi Kivity
  2008-04-23 18:54                 ` Laurent Vivier
  1 sibling, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 16:51 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Laurent Vivier

Anthony Liguori wrote:
>>
>> How can I check that ? How can I measure latency ?
>>   
>
> ping (from guest to host)
>

The guest will halt anyway, flushing its mmio queue.

Perhaps a ping while a background process spins, consuming all cpu.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] kvm: Batch writes to MMIO
  2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
  2008-04-23 13:07   ` [PATCH 2/2] kvm-userspace: " Laurent Vivier
  2008-04-23 14:31   ` [PATCH 1/2] kvm: " Avi Kivity
@ 2008-04-23 16:53   ` Anthony Liguori
  2 siblings, 0 replies; 20+ messages in thread
From: Anthony Liguori @ 2008-04-23 16:53 UTC (permalink / raw)
  To: Laurent Vivier; +Cc: kvm-devel, Avi Kivity

Laurent Vivier wrote:
> This patch is the kernel part of the "batch writes to MMIO" patch.
>
> When kernel has to send MMIO writes to userspace, it stores them
> in memory until it has to pass the hand to userspace for another
> reason. This avoids to have too many context switches on operations
> that can wait.
>
> WARNING: this breaks compatibility with old userspace part.
>
> Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
> ---
>  arch/x86/kvm/x86.c         |   21 +++++++++++++++++++++
>  include/asm-x86/kvm_host.h |    2 ++
>  include/linux/kvm.h        |   10 +++++++++-
>  virt/kvm/kvm_main.c        |    3 +++
>  4 files changed, 35 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0ce5563..3881056 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2942,8 +2942,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  		kvm_x86_ops->decache_regs(vcpu);
>  	}
>  
> +batch:
>  	r = __vcpu_run(vcpu, kvm_run);
>  
> +	if (!r && vcpu->mmio_is_write &&
> +	    kvm_run->exit_reason == KVM_EXIT_MMIO &&
> +	    kvm_run->batch_count < KVM_MAX_BATCH) {
> +		struct kvm_batch *batch = vcpu->arch.batch_data;
> +		int i = kvm_run->batch_count++;
> +
> +		batch[i].phys_addr = vcpu->mmio_phys_addr;
> +		batch[i].len = vcpu->mmio_size;
> +		memcpy(batch[i].data, vcpu->mmio_data, batch[i].len);
> +
> +		goto batch;
> +	}
>   

I wonder if this is sufficient for dynticks enabled guests.  __vcpu_run 
could last a very long time.

Ignoring that for a moment, Avi's comment about having userspace tell 
you which addresses to batch is an important one.  MMIO writes may have 
side effects and the next vcpu_run may rely on those side effects.  For 
instance, MMIO based IRQ acknowledgement.

You need a white list not only for performances purposes but also for 
correctness.

Regards,

Anthony Liguori

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 15:47           ` Anthony Liguori
@ 2008-04-23 17:13             ` Avi Kivity
  0 siblings, 0 replies; 20+ messages in thread
From: Avi Kivity @ 2008-04-23 17:13 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Laurent Vivier

Anthony Liguori wrote:
> Avi Kivity wrote:
>   
>> Anthony Liguori wrote:
>>     
>>> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
>>> with something like iperf (guest=>host) I would think if this is a 
>>> real savings.
>>>
>>>       
>> If we're just improving ne2k, the complexity isn't worth it.  We have 
>> two better nics which are widely supported in guests.
>>     
>
> Sure, but the ne2k should be pretty close to an optimal synthetic 
> benchmark for batching.  If we don't see a big improvement with it, 
> we're probably not going to see a big improvement with it for anything.
>   

I disagree.  If there are other inefficiencies in ne2k, they will mask 
any performance improvement from batching.  So whatever result we get 
will be inconclusive.

I don't expect any massive performance improvement except for splash 
screens.  I'll be happy to settle for a few percent with e1000 or scsi 
or ide.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/2] Batch writes to MMIO
  2008-04-23 16:48               ` Anthony Liguori
  2008-04-23 16:51                 ` Avi Kivity
@ 2008-04-23 18:54                 ` Laurent Vivier
  1 sibling, 0 replies; 20+ messages in thread
From: Laurent Vivier @ 2008-04-23 18:54 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: kvm-devel, Avi Kivity


Le mercredi 23 avril 2008 à 11:48 -0500, Anthony Liguori a écrit :
> Laurent Vivier wrote:
> > Le mercredi 23 avril 2008 à 19:25 +0300, Avi Kivity a écrit :
> >   
> >> Laurent Vivier wrote:
> >>     
> >>> Le mercredi 23 avril 2008 à 10:10 -0500, Anthony Liguori a écrit :
> >>> [...]
> >>>   
> >>>       
> >>>> The ne2k is pretty mmio heavy.  You should be able to observe a boost 
> >>>> with something like iperf (guest=>host) I would think if this is a real 
> >>>> savings.
> >>>>     
> >>>>         
> >>> I like your advices :-D
> >>>
> >>> I use iperf with e1000 emulation and a slightly modified patch (to
> >>> detect MMIO write in a loop), server is on the host, client on the
> >>> guest, with default values.
> >>>
> >>> RESULT WITHOUT BATCHING:
> >>>
> >>> [  4]  0.0-10.0 sec    235 MBytes    197 Mbits/sec
> >>> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> >>> [  4]  0.0-10.0 sec    185 MBytes    155 Mbits/sec
> >>> [  5]  0.0-10.0 sec    227 MBytes    190 Mbits/sec
> >>> [  4]  0.0-10.0 sec    196 MBytes    164 Mbits/sec
> >>> [  5]  0.0-10.0 sec    194 MBytes    163 Mbits/sec
> >>> [  4]  0.0-10.0 sec    184 MBytes    154 Mbits/sec
> >>>
> >>> RESULT WITH BATCHING:
> >>>
> >>> ------------------------------------------------------------
> >>> Server listening on TCP port 5001
> >>> TCP window size: 85.3 KByte (default)
> >>> ------------------------------------------------------------
> >>> [  4]  0.0-10.0 sec    357 MBytes    299 Mbits/sec
> >>> [  5]  0.0-10.1 sec    418 MBytes    347 Mbits/sec
> >>> [  4]  0.0-10.0 sec    408 MBytes    342 Mbits/sec
> >>> [  5]  0.0-10.0 sec    422 MBytes    353 Mbits/sec
> >>> [  4]  0.0-10.1 sec    436 MBytes    362 Mbits/sec
> >>> [  5]  0.0-10.0 sec    416 MBytes    348 Mbits/sec
> >>> [  4]  0.0-10.0 sec    431 MBytes    361 Mbits/sec
> >>>
> >>> Well, it's nice ?
> >>>   
> >>>       
> >> It's too good to be true.
> >>
> >> I think we're seeing two bugs cancel each other out, resulting in a 
> >> performance gain.  Linux doesn't know how to queue outgoing packets, so 
> >> it bangs on the mmio that starts the transmit after every packet.  mmio 
> >> batching doesn't know that this mmio register is critical for latency, 
> >> so it queues it up.  The result is that you you get not just mmio 
> >> batching, but also packet batching!  Which dramatically improves 
> >> performace at the expense of latency.
> >>     
> >
> > How can I check that ? How can I measure latency ?
> >   
> 
> ping (from guest to host)

I have 40 ms instead of 0.09 ms, so Avi you are right.

Laurent
-- 
------------- Laurent.Vivier@bull.net ---------------
"The best way to predict the future is to invent it."
- Alan Kay


-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2008-04-23 18:54 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-23 13:07 [PATCH 0/2] Batch writes to MMIO Laurent Vivier
2008-04-23 13:07 ` [PATCH 1/2] kvm: " Laurent Vivier
2008-04-23 13:07   ` [PATCH 2/2] kvm-userspace: " Laurent Vivier
2008-04-23 14:31   ` [PATCH 1/2] kvm: " Avi Kivity
2008-04-23 14:59     ` Laurent Vivier
2008-04-23 15:04       ` Avi Kivity
2008-04-23 16:53   ` Anthony Liguori
2008-04-23 14:05 ` [PATCH 0/2] " Avi Kivity
2008-04-23 14:22   ` Laurent Vivier
2008-04-23 14:40     ` Avi Kivity
2008-04-23 15:10       ` Anthony Liguori
2008-04-23 15:12         ` Avi Kivity
2008-04-23 15:47           ` Anthony Liguori
2008-04-23 17:13             ` Avi Kivity
2008-04-23 16:24         ` Laurent Vivier
2008-04-23 16:25           ` Avi Kivity
2008-04-23 16:41             ` Laurent Vivier
2008-04-23 16:48               ` Anthony Liguori
2008-04-23 16:51                 ` Avi Kivity
2008-04-23 18:54                 ` Laurent Vivier

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox