* [PATCH kvm-unit-tests v2] vmexit: time the number of cycles for simple PIO
@ 2012-12-13 12:40 Paolo Bonzini
2012-12-18 10:56 ` Gleb Natapov
0 siblings, 1 reply; 2+ messages in thread
From: Paolo Bonzini @ 2012-12-13 12:40 UTC (permalink / raw)
To: kvm; +Cc: mtosatti, gleb
This patch adds three scenarios to the vmexit test. Two are very simple
PIO cases that are handled in the kernel (reading from and writing
to ELCR). The other is an unmapped PIO that is handled in userspace.
The difference between the two reading scenarios is roughly the cost of a
userspace exit; the existing inl_from_pmtimer test is not precise enough,
because the device model has a pretty high cost.
The difference between the kernel read and write is the cost of emulation,
because inl_from_kernel goes through the whole emulation stuff while outl
does not (it is used for virtio, while the speed of inl matters less).
Example:
vmcall 3898
inl_from_pmtimer 24615
inl_from_qemu 20574
inl_from_kernel 7237
outl_to_kernel 4451
So the cost of exiting to userspace is 13000 cycles on this machine,
and the cost of emulation is 3300 cycles.
Suggested-by: Avi Kivity <avi.kivity@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
x86/vmexit.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/x86/vmexit.c b/x86/vmexit.c
index ad8ab55..98f0ead 100644
--- a/x86/vmexit.c
+++ b/x86/vmexit.c
@@ -4,6 +4,18 @@
#include "processor.h"
#include "atomic.h"
+static void outb(unsigned short port, int val)
+{
+ asm volatile("outb %b0, %w1" : "=a"(val) : "Nd"(port));
+}
+
+static unsigned int inb(unsigned short port)
+{
+ unsigned int val;
+ asm volatile("xorl %0, %0; inb %w1, %b0" : "=a"(val) : "Nd"(port));
+ return val;
+}
+
static unsigned int inl(unsigned short port)
{
unsigned int val;
@@ -82,6 +94,21 @@ static void inl_pmtimer(void)
inl(0xb008);
}
+static void inl_nop_qemu(void)
+{
+ inl(0x1234);
+}
+
+static void inl_nop_kernel(void)
+{
+ inb(0x4d0);
+}
+
+static void outl_elcr_kernel(void)
+{
+ outb(0x4d0, 0);
+}
+
static void ple_round_robin(void)
{
struct counter {
@@ -116,6 +143,9 @@ static struct test {
{ mov_to_cr8, "mov_to_cr8" , .parallel = 1, },
#endif
{ inl_pmtimer, "inl_from_pmtimer", .parallel = 1, },
+ { inl_nop_qemu, "inl_from_qemu", .parallel = 1 },
+ { inl_nop_kernel, "inl_from_kernel", .parallel = 1 },
+ { outl_elcr_kernel, "outl_to_kernel", .parallel = 1 },
{ ipi, "ipi", is_smp, .parallel = 0, },
{ ipi_halt, "ipi+halt", is_smp, .parallel = 0, },
{ ple_round_robin, "ple-round-robin", .parallel = 1 },
--
1.8.0.2
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH kvm-unit-tests v2] vmexit: time the number of cycles for simple PIO
2012-12-13 12:40 [PATCH kvm-unit-tests v2] vmexit: time the number of cycles for simple PIO Paolo Bonzini
@ 2012-12-18 10:56 ` Gleb Natapov
0 siblings, 0 replies; 2+ messages in thread
From: Gleb Natapov @ 2012-12-18 10:56 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: kvm, mtosatti
On Thu, Dec 13, 2012 at 01:40:37PM +0100, Paolo Bonzini wrote:
> This patch adds three scenarios to the vmexit test. Two are very simple
> PIO cases that are handled in the kernel (reading from and writing
> to ELCR). The other is an unmapped PIO that is handled in userspace.
>
> The difference between the two reading scenarios is roughly the cost of a
> userspace exit; the existing inl_from_pmtimer test is not precise enough,
> because the device model has a pretty high cost.
>
> The difference between the kernel read and write is the cost of emulation,
> because inl_from_kernel goes through the whole emulation stuff while outl
> does not (it is used for virtio, while the speed of inl matters less).
>
> Example:
>
> vmcall 3898
> inl_from_pmtimer 24615
> inl_from_qemu 20574
> inl_from_kernel 7237
> outl_to_kernel 4451
>
> So the cost of exiting to userspace is 13000 cycles on this machine,
> and the cost of emulation is 3300 cycles.
>
> Suggested-by: Avi Kivity <avi.kivity@gmail.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Thanks, applied. On my machine out is faster than vmcall.
vmcall 2681
outl_to_kernel 2358
> ---
> x86/vmexit.c | 30 ++++++++++++++++++++++++++++++
> 1 file changed, 30 insertions(+)
>
> diff --git a/x86/vmexit.c b/x86/vmexit.c
> index ad8ab55..98f0ead 100644
> --- a/x86/vmexit.c
> +++ b/x86/vmexit.c
> @@ -4,6 +4,18 @@
> #include "processor.h"
> #include "atomic.h"
>
> +static void outb(unsigned short port, int val)
> +{
> + asm volatile("outb %b0, %w1" : "=a"(val) : "Nd"(port));
> +}
> +
> +static unsigned int inb(unsigned short port)
> +{
> + unsigned int val;
> + asm volatile("xorl %0, %0; inb %w1, %b0" : "=a"(val) : "Nd"(port));
> + return val;
> +}
> +
> static unsigned int inl(unsigned short port)
> {
> unsigned int val;
> @@ -82,6 +94,21 @@ static void inl_pmtimer(void)
> inl(0xb008);
> }
>
> +static void inl_nop_qemu(void)
> +{
> + inl(0x1234);
> +}
> +
> +static void inl_nop_kernel(void)
> +{
> + inb(0x4d0);
> +}
> +
> +static void outl_elcr_kernel(void)
> +{
> + outb(0x4d0, 0);
> +}
> +
> static void ple_round_robin(void)
> {
> struct counter {
> @@ -116,6 +143,9 @@ static struct test {
> { mov_to_cr8, "mov_to_cr8" , .parallel = 1, },
> #endif
> { inl_pmtimer, "inl_from_pmtimer", .parallel = 1, },
> + { inl_nop_qemu, "inl_from_qemu", .parallel = 1 },
> + { inl_nop_kernel, "inl_from_kernel", .parallel = 1 },
> + { outl_elcr_kernel, "outl_to_kernel", .parallel = 1 },
> { ipi, "ipi", is_smp, .parallel = 0, },
> { ipi_halt, "ipi+halt", is_smp, .parallel = 0, },
> { ple_round_robin, "ple-round-robin", .parallel = 1 },
> --
> 1.8.0.2
--
Gleb.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2012-12-18 10:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-13 12:40 [PATCH kvm-unit-tests v2] vmexit: time the number of cycles for simple PIO Paolo Bonzini
2012-12-18 10:56 ` Gleb Natapov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox