[PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys
@ 2010-09-19 13:56 Joerg Roedel
  2010-09-19 13:56 ` [PATCH 1/2] svm: Add VMRUN/VMEXIT latency test Joerg Roedel
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Joerg Roedel @ 2010-09-19 13:56 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Alexander Graf

Hi,

here are two patches for the svm unit-test framework that measure the
latency of the emulated vmrun, vmload, and vmsave instructions as well
as the latency of emulated vmexits.

On my Phenom II X6 1090T running in P0 it produces output like this:

    Latency VMRUN : max: 140956 min: 6770 avg: 6819
    Latency VMEXIT: max: 141042 min: 7475 avg: 7575
latency_run_exit: PASS
    Latency VMLOAD: max: 23248 min: 1915 avg: 1925
    Latency VMSAVE: max: 130888 min: 1917 avg: 1955
latency_load_save: PASS

These two tests report the tsc-cycles between events. Each latency is
measured one million times (which explains the large -max values).

	Joerg

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] svm: Add VMRUN/VMEXIT latency test
  2010-09-19 13:56 [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Joerg Roedel
@ 2010-09-19 13:56 ` Joerg Roedel
  2010-09-19 13:56 ` [PATCH 2/2] svm: Add VMLOAD/VMSAVE " Joerg Roedel
  2010-09-20 18:05 ` [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Marcelo Tosatti
  2 siblings, 0 replies; 5+ messages in thread
From: Joerg Roedel @ 2010-09-19 13:56 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Alexander Graf, Joerg Roedel

This patch adds a test to measure the latency of VMRUN and
VMEXIT.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 x86/svm.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 81 insertions(+), 0 deletions(-)

diff --git a/x86/svm.c b/x86/svm.c
index dc3098f..babd77d 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -13,6 +13,18 @@ u64 *pde[4];
 u64 *pte[2048];
 u64 *scratch_page;
 
+#define LATENCY_RUNS 1000000
+
+u64 tsc_start;
+u64 tsc_end;
+
+u64 vmrun_sum, vmexit_sum;
+u64 latvmrun_max;
+u64 latvmrun_min;
+u64 latvmexit_max;
+u64 latvmexit_min;
+u64 runs;
+
 static bool npt_supported(void)
 {
    return cpuid(0x8000000A).d & 1;
@@ -162,6 +174,7 @@ static bool test_run(struct test *test, struct vmcb *vmcb)
     vmcb->save.rip = (ulong)test_thunk;
     vmcb->save.rsp = (ulong)(guest_stack + ARRAY_SIZE(guest_stack));
     do {
+        tsc_start = rdtsc();
         asm volatile (
             "clgi \n\t"
             "vmload \n\t"
@@ -176,9 +189,11 @@ static bool test_run(struct test *test, struct vmcb *vmcb)
             : "rbx", "rcx", "rdx", "rsi",
               "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15",
               "memory");
+	tsc_end = rdtsc();
         ++test->exits;
     } while (!test->finished(test));
 
+
     success = test->succeeded(test);
 
     printf("%s: %s\n", test->name, success ? "PASS" : "FAIL");
@@ -582,6 +597,70 @@ static bool npt_pfwalk_check(struct test *test)
 	   && (test->vmcb->control.exit_info_2 == read_cr3());
 }
 
+static void latency_prepare(struct test *test)
+{
+    default_prepare(test);
+    runs = LATENCY_RUNS;
+    latvmrun_min = latvmexit_min = -1ULL;
+    latvmrun_max = latvmexit_max = 0;
+    vmrun_sum = vmexit_sum = 0;
+}
+
+static void latency_test(struct test *test)
+{
+    u64 cycles;
+
+start:
+    tsc_end = rdtsc();
+
+    cycles = tsc_end - tsc_start;
+
+    if (cycles > latvmrun_max)
+        latvmrun_max = cycles;
+
+    if (cycles < latvmrun_min)
+        latvmrun_min = cycles;
+
+    vmrun_sum += cycles;
+
+    tsc_start = rdtsc();
+
+    asm volatile ("vmmcall" : : : "memory");
+    goto start;
+}
+
+static bool latency_finished(struct test *test)
+{
+    u64 cycles;
+
+    tsc_end = rdtsc();
+
+    cycles = tsc_end - tsc_start;
+
+    if (cycles > latvmexit_max)
+        latvmexit_max = cycles;
+
+    if (cycles < latvmexit_min)
+        latvmexit_min = cycles;
+
+    vmexit_sum += cycles;
+
+    test->vmcb->save.rip += 3;
+
+    runs -= 1;
+
+    return runs == 0;
+}
+
+static bool latency_check(struct test *test)
+{
+    printf("    Latency VMRUN : max: %d min: %d avg: %d\n", latvmrun_max,
+            latvmrun_min, vmrun_sum / LATENCY_RUNS);
+    printf("    Latency VMEXIT: max: %d min: %d avg: %d\n", latvmexit_max,
+            latvmexit_min, vmexit_sum / LATENCY_RUNS);
+    return true;
+}
+
 static struct test tests[] = {
     { "null", default_supported, default_prepare, null_test,
       default_finished, null_check },
@@ -614,6 +693,8 @@ static struct test tests[] = {
 	    default_finished, npt_rw_check },
     { "npt_pfwalk", npt_supported, npt_pfwalk_prepare, null_test,
 	    default_finished, npt_pfwalk_check },
+    { "latency_run_exit", default_supported, latency_prepare, latency_test,
+      latency_finished, latency_check },
 };
 
 int main(int ac, char **av)
-- 
1.7.0.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] svm: Add VMLOAD/VMSAVE latency test
  2010-09-19 13:56 [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Joerg Roedel
  2010-09-19 13:56 ` [PATCH 1/2] svm: Add VMRUN/VMEXIT latency test Joerg Roedel
@ 2010-09-19 13:56 ` Joerg Roedel
  2010-09-19 15:07   ` Roedel, Joerg
  2010-09-20 18:05 ` [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Marcelo Tosatti
  2 siblings, 1 reply; 5+ messages in thread
From: Joerg Roedel @ 2010-09-19 13:56 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm, Alexander Graf, Joerg Roedel

This patch adds a test to measure the latency of the VMLOAD
and VMSAVE instructions.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 x86/svm.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/x86/svm.c b/x86/svm.c
index babd77d..26bf566 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -19,10 +19,15 @@ u64 tsc_start;
 u64 tsc_end;
 
 u64 vmrun_sum, vmexit_sum;
+u64 vmsave_sum, vmload_sum;
 u64 latvmrun_max;
 u64 latvmrun_min;
 u64 latvmexit_max;
 u64 latvmexit_min;
+u64 latvmload_max;
+u64 latvmload_min;
+u64 latvmsave_max;
+u64 latvmsave_min;
 u64 runs;
 
 static bool npt_supported(void)
@@ -661,6 +666,51 @@ static bool latency_check(struct test *test)
     return true;
 }
 
+static void load_save_prepare(struct test *test)
+{
+    default_prepare(test);
+    runs = LATENCY_RUNS;
+    latvmload_min = latvmsave_min = -1ULL;
+    latvmload_max = latvmsave_max = 0;
+    vmload_sum = vmsave_sum = 0;
+}
+
+static bool load_save_finished(struct test *test)
+{
+    u64 vmcb_phys = virt_to_phys(test->vmcb);
+    u64 cycles;
+
+    for ( ; runs != 0; runs--) {
+        tsc_start = rdtsc();
+        asm volatile("vmload\n\t" : : "a"(vmcb_phys) : "memory");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latvmload_max)
+            latvmload_max = cycles;
+        if (cycles < latvmload_min)
+            latvmload_min = cycles;
+        vmload_sum += cycles;
+
+        tsc_start = rdtsc();
+        asm volatile("vmsave\n\t" : : "a"(vmcb_phys) : "memory");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latvmsave_max)
+            latvmsave_max = cycles;
+        if (cycles < latvmsave_min)
+            latvmsave_min = cycles;
+        vmsave_sum += cycles;
+    }
+
+    return true;
+}
+
+static bool load_save_check(struct test *test)
+{
+    printf("    Latency VMLOAD: max: %d min: %d avg: %d\n", latvmload_max,
+            latvmload_min, vmload_sum / LATENCY_RUNS);
+    printf("    Latency VMSAVE: max: %d min: %d avg: %d\n", latvmsave_max,
+            latvmsave_min, vmsave_sum / LATENCY_RUNS);
+    return true;
+}
 static struct test tests[] = {
     { "null", default_supported, default_prepare, null_test,
       default_finished, null_check },
@@ -695,6 +745,8 @@ static struct test tests[] = {
 	    default_finished, npt_pfwalk_check },
     { "latency_run_exit", default_supported, latency_prepare, latency_test,
       latency_finished, latency_check },
+    { "latency_load_save", default_supported, load_save_prepare, null_test,
+      load_save_finished, load_save_check },
 };
 
 int main(int ac, char **av)
-- 
1.7.0.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] svm: Add VMLOAD/VMSAVE latency test
  2010-09-19 13:56 ` [PATCH 2/2] svm: Add VMLOAD/VMSAVE " Joerg Roedel
@ 2010-09-19 15:07   ` Roedel, Joerg
  0 siblings, 0 replies; 5+ messages in thread
From: Roedel, Joerg @ 2010-09-19 15:07 UTC (permalink / raw)
  To: Avi Kivity, Marcelo Tosatti; +Cc: kvm@vger.kernel.org, Alexander Graf

Just figured out that it makes a lot of sens to measure the latency of
stgi and clgi too in this test. Here is an updated one.

>From 350d33d155db8928f81cc801587787c84d6bc8c5 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Sun, 19 Sep 2010 15:34:37 +0200
Subject: [PATCH] svm: Add VMLOAD/VMSAVE and STGI/CLGI latency test

This patch adds a test to measure the latency of the VMLOAD
and VMSAVE instructions as well as for STGI and CLGI.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 x86/svm.c |   79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 79 insertions(+), 0 deletions(-)

diff --git a/x86/svm.c b/x86/svm.c
index babd77d..ed784c4 100644
--- a/x86/svm.c
+++ b/x86/svm.c
@@ -19,10 +19,20 @@ u64 tsc_start;
 u64 tsc_end;
 
 u64 vmrun_sum, vmexit_sum;
+u64 vmsave_sum, vmload_sum;
+u64 stgi_sum, clgi_sum;
 u64 latvmrun_max;
 u64 latvmrun_min;
 u64 latvmexit_max;
 u64 latvmexit_min;
+u64 latvmload_max;
+u64 latvmload_min;
+u64 latvmsave_max;
+u64 latvmsave_min;
+u64 latstgi_max;
+u64 latstgi_min;
+u64 latclgi_max;
+u64 latclgi_min;
 u64 runs;
 
 static bool npt_supported(void)
@@ -661,6 +671,73 @@ static bool latency_check(struct test *test)
     return true;
 }
 
+static void lat_svm_insn_prepare(struct test *test)
+{
+    default_prepare(test);
+    runs = LATENCY_RUNS;
+    latvmload_min = latvmsave_min = latstgi_min = latclgi_min = -1ULL;
+    latvmload_max = latvmsave_max = latstgi_max = latclgi_max = 0;
+    vmload_sum = vmsave_sum = stgi_sum = clgi_sum;
+}
+
+static bool lat_svm_insn_finished(struct test *test)
+{
+    u64 vmcb_phys = virt_to_phys(test->vmcb);
+    u64 cycles;
+
+    for ( ; runs != 0; runs--) {
+        tsc_start = rdtsc();
+        asm volatile("vmload\n\t" : : "a"(vmcb_phys) : "memory");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latvmload_max)
+            latvmload_max = cycles;
+        if (cycles < latvmload_min)
+            latvmload_min = cycles;
+        vmload_sum += cycles;
+
+        tsc_start = rdtsc();
+        asm volatile("vmsave\n\t" : : "a"(vmcb_phys) : "memory");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latvmsave_max)
+            latvmsave_max = cycles;
+        if (cycles < latvmsave_min)
+            latvmsave_min = cycles;
+        vmsave_sum += cycles;
+
+        tsc_start = rdtsc();
+        asm volatile("stgi\n\t");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latstgi_max)
+            latstgi_max = cycles;
+        if (cycles < latstgi_min)
+            latstgi_min = cycles;
+        stgi_sum += cycles;
+
+        tsc_start = rdtsc();
+        asm volatile("clgi\n\t");
+        cycles = rdtsc() - tsc_start;
+        if (cycles > latclgi_max)
+            latclgi_max = cycles;
+        if (cycles < latclgi_min)
+            latclgi_min = cycles;
+        clgi_sum += cycles;
+    }
+
+    return true;
+}
+
+static bool lat_svm_insn_check(struct test *test)
+{
+    printf("    Latency VMLOAD: max: %d min: %d avg: %d\n", latvmload_max,
+            latvmload_min, vmload_sum / LATENCY_RUNS);
+    printf("    Latency VMSAVE: max: %d min: %d avg: %d\n", latvmsave_max,
+            latvmsave_min, vmsave_sum / LATENCY_RUNS);
+    printf("    Latency STGI:   max: %d min: %d avg: %d\n", latstgi_max,
+            latstgi_min, stgi_sum / LATENCY_RUNS);
+    printf("    Latency CLGI:   max: %d min: %d avg: %d\n", latclgi_max,
+            latclgi_min, clgi_sum / LATENCY_RUNS);
+    return true;
+}
 static struct test tests[] = {
     { "null", default_supported, default_prepare, null_test,
       default_finished, null_check },
@@ -695,6 +772,8 @@ static struct test tests[] = {
 	    default_finished, npt_pfwalk_check },
     { "latency_run_exit", default_supported, latency_prepare, latency_test,
       latency_finished, latency_check },
+    { "latency_svm_insn", default_supported, lat_svm_insn_prepare, null_test,
+      lat_svm_insn_finished, lat_svm_insn_check },
 };
 
 int main(int ac, char **av)
-- 
1.7.0.4


-- 
AMD Operating System Research Center

Advanced Micro Devices GmbH Einsteinring 24 85609 Dornach
General Managers: Alberto Bozzo, Andrew Bowd
Registration: Dornach, Landkr. Muenchen; Registerger. Muenchen, HRB Nr. 43632


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys
  2010-09-19 13:56 [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Joerg Roedel
  2010-09-19 13:56 ` [PATCH 1/2] svm: Add VMRUN/VMEXIT latency test Joerg Roedel
  2010-09-19 13:56 ` [PATCH 2/2] svm: Add VMLOAD/VMSAVE " Joerg Roedel
@ 2010-09-20 18:05 ` Marcelo Tosatti
  2 siblings, 0 replies; 5+ messages in thread
From: Marcelo Tosatti @ 2010-09-20 18:05 UTC (permalink / raw)
  To: Joerg Roedel; +Cc: Avi Kivity, kvm, Alexander Graf

On Sun, Sep 19, 2010 at 03:56:49PM +0200, Joerg Roedel wrote:
> Hi,
> 
> here are two patches for the svm unit-test framework that measure the
> latency of the emulated vmrun, vmload, and vmsave instructions as well
> as the latency of emulated vmexits.
> 
> On my Phenom II X6 1090T running in P0 it produces output like this:
> 
>     Latency VMRUN : max: 140956 min: 6770 avg: 6819
>     Latency VMEXIT: max: 141042 min: 7475 avg: 7575
> latency_run_exit: PASS
>     Latency VMLOAD: max: 23248 min: 1915 avg: 1925
>     Latency VMSAVE: max: 130888 min: 1917 avg: 1955
> latency_load_save: PASS
> 
> These two tests report the tsc-cycles between events. Each latency is
> measured one million times (which explains the large -max values).
> 
> 	Joerg

Applied, thanks. 


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-09-20 19:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-09-19 13:56 [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Joerg Roedel
2010-09-19 13:56 ` [PATCH 1/2] svm: Add VMRUN/VMEXIT latency test Joerg Roedel
2010-09-19 13:56 ` [PATCH 2/2] svm: Add VMLOAD/VMSAVE " Joerg Roedel
2010-09-19 15:07   ` Roedel, Joerg
2010-09-20 18:05 ` [PATCH 0/2] unit-tests: Add tests to measure svm instruction latencys Marcelo Tosatti

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox