kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-06 15:24 Arthur Chunqi Li
  2013-06-06 15:24 ` [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator Arthur Chunqi Li
                   ` (2 more replies)
  0 siblings, 3 replies; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-06 15:24 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/x86/emulator.c b/x86/emulator.c
index 96576e5..8ab9904 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,14 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 rip, rflags;
+};
+
+static struct regs inregs, outregs;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
+			     uint8_t *alt_insn_page, void *insn_ram,
+			     uint8_t *alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	int i;
+
+	// Pad with RET instructions
+	memset(insn_page, 0xc3, 4096);
+	memset(alt_insn_page, 0xc3, 4096);
+
+	// Place a trapping instruction in the page to trigger a VMEXIT
+	insn_page[0] = 0x89; // mov %eax, (%rax)
+	insn_page[1] = 0x00;
+	insn_page[2] = 0x90; // nop
+	insn_page[3] = 0xc3; // ret
+
+	// Place the instruction we want the hypervisor to see in the alternate page
+	for (i=0; i<alt_insn_length; i++)
+		alt_insn_page[i] = alt_insn[i];
+
+	// Save general registers
+	asm volatile(
+		"push %rax\n\r"
+		"push %rbx\n\r"
+		"push %rcx\n\r"
+		"push %rdx\n\r"
+		"push %rsi\n\r"
+		"push %rdi\n\r"
+		);
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram + 3));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile(
+		"call *%1\n\r"
+
+		"mov %%rax, 0+%[outregs] \n\t"
+		"mov %%rbx, 8+%[outregs] \n\t"
+		"mov %%rcx, 16+%[outregs] \n\t"
+		"mov %%rdx, 24+%[outregs] \n\t"
+		"mov %%rsi, 32+%[outregs] \n\t"
+		"mov %%rdi, 40+%[outregs] \n\t"
+		"mov %%rsp,48+ %[outregs] \n\t"
+		"mov %%rbp, 56+%[outregs] \n\t"
+
+		/* Save RFLAGS in outregs*/
+		"pushf \n\t"
+		"popq 72+%[outregs] \n\t"
+		: [outregs]"+m"(outregs)
+		: "r"(insn_ram),
+			"a"(mem), "b"(inregs.rbx),
+			"c"(inregs.rcx), "d"(inregs.rdx),
+			"S"(inregs.rsi), "D"(inregs.rdi)
+		: "memory", "cc"
+		);
+	// Restore general registers
+	asm volatile(
+		"pop %rax\n\r"
+		"pop %rbx\n\r"
+		"pop %rcx\n\r"
+		"pop %rdx\n\r"
+		"pop %rsi\n\r"
+		"pop %rdi\n\r"
+		);
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator
  2013-06-06 15:24 [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator Arthur Chunqi Li
@ 2013-06-06 15:24 ` Arthur Chunqi Li
  2013-06-12 20:51   ` Paolo Bonzini
  2013-06-07  2:14 ` [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator 李春奇 <Arthur Chunqi Li>
  2013-06-12 20:50 ` Paolo Bonzini
  2 siblings, 1 reply; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-06 15:24 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, Arthur Chunqi Li

Change two functions (test_mmx_movq_mf and test_movabs) using
unified trap_emulator.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |   66 ++++++++++++--------------------------------------------
 1 file changed, 14 insertions(+), 52 deletions(-)

diff --git a/x86/emulator.c b/x86/emulator.c
index 8ab9904..fa8993f 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -776,72 +776,34 @@ static void test_mmx_movq_mf(uint64_t *mem, uint8_t *insn_page,
 			     uint8_t *alt_insn_page, void *insn_ram)
 {
     uint16_t fcw = 0;  // all exceptions unmasked
-    ulong *cr3 = (ulong *)read_cr3();
+    uint8_t alt_insn[] = {0x0f, 0x7f, 0x00}; // movq %mm0, (%rax)
 
     write_cr0(read_cr0() & ~6);  // TS, EM
-    // Place a trapping instruction in the page to trigger a VMEXIT
-    insn_page[0] = 0x89; // mov %eax, (%rax)
-    insn_page[1] = 0x00;
-    insn_page[2] = 0x90; // nop
-    insn_page[3] = 0xc3; // ret
-    // Place the instruction we want the hypervisor to see in the alternate page
-    alt_insn_page[0] = 0x0f; // movq %mm0, (%rax)
-    alt_insn_page[1] = 0x7f;
-    alt_insn_page[2] = 0x00;
-    alt_insn_page[3] = 0xc3; // ret
-
     exceptions = 0;
     handle_exception(MF_VECTOR, advance_rip_by_3_and_note_exception);
-
-    // Load the code TLB with insn_page, but point the page tables at
-    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
-    // This will make the CPU trap on the insn_page instruction but the
-    // hypervisor will see alt_insn_page.
-    install_page(cr3, virt_to_phys(insn_page), insn_ram);
     asm volatile("fninit; fldcw %0" : : "m"(fcw));
     asm volatile("fldz; fldz; fdivp"); // generate exception
-    invlpg(insn_ram);
-    // Load code TLB
-    asm volatile("call *%0" : : "r"(insn_ram + 3));
-    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
-    // Trap, let hypervisor emulate at alt_insn_page
-    asm volatile("call *%0" : : "r"(insn_ram), "a"(mem));
+
+    inregs = (struct regs){ 0 };
+    trap_emulator(mem, insn_page, alt_insn_page, insn_ram, 
+				alt_insn, 3);
     // exit MMX mode
     asm volatile("fnclex; emms");
-    report("movq mmx generates #MF", exceptions == 1);
+    report("movq mmx generates #MF2", exceptions == 1);
     handle_exception(MF_VECTOR, 0);
 }
 
 static void test_movabs(uint64_t *mem, uint8_t *insn_page,
 		       uint8_t *alt_insn_page, void *insn_ram)
 {
-    uint64_t val = 0;
-    ulong *cr3 = (ulong *)read_cr3();
-
-    // Pad with RET instructions
-    memset(insn_page, 0xc3, 4096);
-    memset(alt_insn_page, 0xc3, 4096);
-    // Place a trapping instruction in the page to trigger a VMEXIT
-    insn_page[0] = 0x89; // mov %eax, (%rax)
-    insn_page[1] = 0x00;
-    // Place the instruction we want the hypervisor to see in the alternate
-    // page. A buggy hypervisor will fetch a 32-bit immediate and return
-    // 0xffffffffc3c3c3c3.
-    alt_insn_page[0] = 0x48; // mov $0xc3c3c3c3c3c3c3c3, %rcx
-    alt_insn_page[1] = 0xb9;
-
-    // Load the code TLB with insn_page, but point the page tables at
-    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
-    // This will make the CPU trap on the insn_page instruction but the
-    // hypervisor will see alt_insn_page.
-    install_page(cr3, virt_to_phys(insn_page), insn_ram);
-    // Load code TLB
-    invlpg(insn_ram);
-    asm volatile("call *%0" : : "r"(insn_ram + 3));
-    // Trap, let hypervisor emulate at alt_insn_page
-    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
-    asm volatile("call *%1" : "=c"(val) : "r"(insn_ram), "a"(mem), "c"(0));
-    report("64-bit mov imm", val == 0xc3c3c3c3c3c3c3c3);
+    // mov $0xc3c3c3c3c3c3c3c3, %rcx
+    uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
+					0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
+    inregs = (struct regs){ .rcx = 0 };
+    
+    trap_emulator(mem, insn_page, alt_insn_page, insn_ram,
+				alt_insn, 10);
+    report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
 }
 
 static void test_crosspage_mmio(volatile uint8_t *mem)
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-06 15:24 [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator Arthur Chunqi Li
  2013-06-06 15:24 ` [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator Arthur Chunqi Li
@ 2013-06-07  2:14 ` 李春奇 <Arthur Chunqi Li>
  2013-06-12 20:50 ` Paolo Bonzini
  2 siblings, 0 replies; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-07  2:14 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, Paolo Bonzini, Arthur Chunqi Li

This version of save/restore general register seems a bit too ugly, I
will change it and commit another patch.

Some of the registers cannot be set as realmode.c do, for example %rax
used to save return value, wrong %esp %ebp may cause crash, and I
think changed %rflags may cause some unknown error. So these registers
should not be set by caller.

Arthur

On Thu, Jun 6, 2013 at 11:24 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
>
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..8ab9904 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,14 @@ int fails, tests;
>
>  static int exceptions;
>
> +struct regs {
> +       u64 rax, rbx, rcx, rdx;
> +       u64 rsi, rdi, rsp, rbp;
> +       u64 rip, rflags;
> +};
> +
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>         ++tests;
> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +                            uint8_t *alt_insn_page, void *insn_ram,
> +                            uint8_t *alt_insn, int alt_insn_length)
> +{
> +       ulong *cr3 = (ulong *)read_cr3();
> +       int i;
> +
> +       // Pad with RET instructions
> +       memset(insn_page, 0xc3, 4096);
> +       memset(alt_insn_page, 0xc3, 4096);
> +
> +       // Place a trapping instruction in the page to trigger a VMEXIT
> +       insn_page[0] = 0x89; // mov %eax, (%rax)
> +       insn_page[1] = 0x00;
> +       insn_page[2] = 0x90; // nop
> +       insn_page[3] = 0xc3; // ret
> +
> +       // Place the instruction we want the hypervisor to see in the alternate page
> +       for (i=0; i<alt_insn_length; i++)
> +               alt_insn_page[i] = alt_insn[i];
> +
> +       // Save general registers
> +       asm volatile(
> +               "push %rax\n\r"
> +               "push %rbx\n\r"
> +               "push %rcx\n\r"
> +               "push %rdx\n\r"
> +               "push %rsi\n\r"
> +               "push %rdi\n\r"
> +               );
> +       // Load the code TLB with insn_page, but point the page tables at
> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +       // This will make the CPU trap on the insn_page instruction but the
> +       // hypervisor will see alt_insn_page.
> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +       invlpg(insn_ram);
> +       // Load code TLB
> +       asm volatile("call *%0" : : "r"(insn_ram + 3));
> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +       // Trap, let hypervisor emulate at alt_insn_page
> +       asm volatile(
> +               "call *%1\n\r"
> +
> +               "mov %%rax, 0+%[outregs] \n\t"
> +               "mov %%rbx, 8+%[outregs] \n\t"
> +               "mov %%rcx, 16+%[outregs] \n\t"
> +               "mov %%rdx, 24+%[outregs] \n\t"
> +               "mov %%rsi, 32+%[outregs] \n\t"
> +               "mov %%rdi, 40+%[outregs] \n\t"
> +               "mov %%rsp,48+ %[outregs] \n\t"
> +               "mov %%rbp, 56+%[outregs] \n\t"
> +
> +               /* Save RFLAGS in outregs*/
> +               "pushf \n\t"
> +               "popq 72+%[outregs] \n\t"
> +               : [outregs]"+m"(outregs)
> +               : "r"(insn_ram),
> +                       "a"(mem), "b"(inregs.rbx),
> +                       "c"(inregs.rcx), "d"(inregs.rdx),
> +                       "S"(inregs.rsi), "D"(inregs.rdi)
> +               : "memory", "cc"
> +               );
> +       // Restore general registers
> +       asm volatile(
> +               "pop %rax\n\r"
> +               "pop %rbx\n\r"
> +               "pop %rcx\n\r"
> +               "pop %rdx\n\r"
> +               "pop %rsi\n\r"
> +               "pop %rdi\n\r"
> +               );
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> --
> 1.7.9.5
>



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-07  2:31 Arthur Chunqi Li
  2013-06-09 11:07 ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-07  2:31 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
cannot set in inregs), put instruction codec in alt_insn and call
func with alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 mode change 100644 => 100755 x86/emulator.c

diff --git a/x86/emulator.c b/x86/emulator.c
old mode 100644
new mode 100755
index 96576e5..770e8f7
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,13 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 rip, rflags;
+};
+static struct regs inregs, outregs;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
+			     uint8_t *alt_insn_page, void *insn_ram,
+			     uint8_t *alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	int i;
+	static struct regs save;
+
+	// Pad with RET instructions
+	memset(insn_page, 0xc3, 4096);
+	memset(alt_insn_page, 0xc3, 4096);
+
+	// Place a trapping instruction in the page to trigger a VMEXIT
+	insn_page[0] = 0x89; // mov %eax, (%rax)
+	insn_page[1] = 0x00;
+	insn_page[2] = 0x90; // nop
+	insn_page[3] = 0xc3; // ret
+
+	// Place the instruction we want the hypervisor to see in the alternate page
+	for (i=0; i<alt_insn_length; i++)
+		alt_insn_page[i] = alt_insn[i];
+	save = inregs;
+	
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram + 3));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile(
+		"xchg %%rbx, 8+%[save] \n\t"
+		"xchg %%rcx, 16+%[save] \n\t"
+		"xchg %%rdx, 24+%[save] \n\t"
+		"xchg %%rsi, 32+%[save] \n\t"
+		"xchg %%rdi, 40+%[save] \n\t"
+
+		"call *%1\n\t"
+
+		"mov %%rax, 0+%[save] \n\t"
+		"xchg %%rbx, 8+%[save] \n\t"
+		"xchg %%rcx, 16+%[save] \n\t"
+		"xchg %%rdx, 24+%[save] \n\t"
+		"xchg %%rsi, 32+%[save] \n\t"
+		"xchg %%rdi, 40+%[save] \n\t"
+		"mov %%rsp, 48+%[save] \n\t"
+		"mov %%rbp, 56+%[save] \n\t"
+		/* Save RFLAGS in outregs*/
+		"pushf \n\t"
+		"popq 72+%[save] \n\t"
+		: [save]"+m"(save)
+		: "r"(insn_ram), "a"(mem)
+		: "memory", "cc"
+		);
+	outregs = save;
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-07  2:31 Arthur Chunqi Li
@ 2013-06-09 11:07 ` Gleb Natapov
  2013-06-09 12:44   ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 11:07 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, pbonzini

On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> cannot set in inregs), put instruction codec in alt_insn and call
> func with alt_insn_length. Get results in outregs.
> 
Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?

%rax because trapping instruction uses it? Use one that does not use
register at all: MOV r/m32, imm32 

%rsp and %rbp because of ret on instruction page? Use the same trick
realmode.c test uses: have the code that sets/saves registers in
insn_page/alt_insn_page itself and copy the instruction you want to test
into the page itself instead of doing call.

Not sure what is so special about %rflags.

> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
>  mode change 100644 => 100755 x86/emulator.c
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> old mode 100644
> new mode 100755
> index 96576e5..770e8f7
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,13 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 rip, rflags;
> +};
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +			     uint8_t *alt_insn_page, void *insn_ram,
> +			     uint8_t *alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	int i;
> +	static struct regs save;
> +
> +	// Pad with RET instructions
> +	memset(insn_page, 0xc3, 4096);
> +	memset(alt_insn_page, 0xc3, 4096);
> +
> +	// Place a trapping instruction in the page to trigger a VMEXIT
> +	insn_page[0] = 0x89; // mov %eax, (%rax)
> +	insn_page[1] = 0x00;
> +	insn_page[2] = 0x90; // nop
> +	insn_page[3] = 0xc3; // ret
> +
> +	// Place the instruction we want the hypervisor to see in the alternate page
> +	for (i=0; i<alt_insn_length; i++)
> +		alt_insn_page[i] = alt_insn[i];
> +	save = inregs;
> +	
> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram + 3));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile(
> +		"xchg %%rbx, 8+%[save] \n\t"
> +		"xchg %%rcx, 16+%[save] \n\t"
> +		"xchg %%rdx, 24+%[save] \n\t"
> +		"xchg %%rsi, 32+%[save] \n\t"
> +		"xchg %%rdi, 40+%[save] \n\t"
> +
> +		"call *%1\n\t"
> +
> +		"mov %%rax, 0+%[save] \n\t"
> +		"xchg %%rbx, 8+%[save] \n\t"
> +		"xchg %%rcx, 16+%[save] \n\t"
> +		"xchg %%rdx, 24+%[save] \n\t"
> +		"xchg %%rsi, 32+%[save] \n\t"
> +		"xchg %%rdi, 40+%[save] \n\t"
> +		"mov %%rsp, 48+%[save] \n\t"
> +		"mov %%rbp, 56+%[save] \n\t"
> +		/* Save RFLAGS in outregs*/
> +		"pushf \n\t"
> +		"popq 72+%[save] \n\t"
> +		: [save]"+m"(save)
> +		: "r"(insn_ram), "a"(mem)
> +		: "memory", "cc"
> +		);
> +	outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> -- 
> 1.7.9.5

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 11:07 ` Gleb Natapov
@ 2013-06-09 12:44   ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 12:49     ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 12:44 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> Add a function trap_emulator to run an instruction in emulator.
>> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> cannot set in inregs), put instruction codec in alt_insn and call
>> func with alt_insn_length. Get results in outregs.
>>
> Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>
> %rax because trapping instruction uses it? Use one that does not use
> register at all: MOV r/m32, imm32
I don't know why set %rax before call alt_insn_page can cause error. I
use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
correctly.
>
> %rsp and %rbp because of ret on instruction page? Use the same trick
> realmode.c test uses: have the code that sets/saves registers in
> insn_page/alt_insn_page itself and copy the instruction you want to test
> into the page itself instead of doing call.
I don't know how instructions between calling insn_page and
alt_insn_page are executed (function install_page and some other
instructions before call *%1". If these insns are executed after
insn_page is called, changes before the trapping instruction may
affect the executing of these instructions.

>
> Not sure what is so special about %rflags.
>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 67 insertions(+)
>>  mode change 100644 => 100755 x86/emulator.c
>>
>> diff --git a/x86/emulator.c b/x86/emulator.c
>> old mode 100644
>> new mode 100755
>> index 96576e5..770e8f7
>> --- a/x86/emulator.c
>> +++ b/x86/emulator.c
>> @@ -11,6 +11,13 @@ int fails, tests;
>>
>>  static int exceptions;
>>
>> +struct regs {
>> +     u64 rax, rbx, rcx, rdx;
>> +     u64 rsi, rdi, rsp, rbp;
>> +     u64 rip, rflags;
>> +};
>> +static struct regs inregs, outregs;
>> +
>>  void report(const char *name, int result)
>>  {
>>       ++tests;
>> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>  }
>>
>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> +                          uint8_t *alt_insn, int alt_insn_length)
>> +{
>> +     ulong *cr3 = (ulong *)read_cr3();
>> +     int i;
>> +     static struct regs save;
>> +
>> +     // Pad with RET instructions
>> +     memset(insn_page, 0xc3, 4096);
>> +     memset(alt_insn_page, 0xc3, 4096);
>> +
>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> +     insn_page[1] = 0x00;
>> +     insn_page[2] = 0x90; // nop
>> +     insn_page[3] = 0xc3; // ret
>> +
>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> +     for (i=0; i<alt_insn_length; i++)
>> +             alt_insn_page[i] = alt_insn[i];
>> +     save = inregs;
>> +
>> +     // Load the code TLB with insn_page, but point the page tables at
>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> +     // This will make the CPU trap on the insn_page instruction but the
>> +     // hypervisor will see alt_insn_page.
>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> +     invlpg(insn_ram);
>> +     // Load code TLB
>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> +     // Trap, let hypervisor emulate at alt_insn_page
>> +     asm volatile(
>> +             "xchg %%rbx, 8+%[save] \n\t"
>> +             "xchg %%rcx, 16+%[save] \n\t"
>> +             "xchg %%rdx, 24+%[save] \n\t"
>> +             "xchg %%rsi, 32+%[save] \n\t"
>> +             "xchg %%rdi, 40+%[save] \n\t"
>> +
>> +             "call *%1\n\t"
>> +
>> +             "mov %%rax, 0+%[save] \n\t"
>> +             "xchg %%rbx, 8+%[save] \n\t"
>> +             "xchg %%rcx, 16+%[save] \n\t"
>> +             "xchg %%rdx, 24+%[save] \n\t"
>> +             "xchg %%rsi, 32+%[save] \n\t"
>> +             "xchg %%rdi, 40+%[save] \n\t"
>> +             "mov %%rsp, 48+%[save] \n\t"
>> +             "mov %%rbp, 56+%[save] \n\t"
>> +             /* Save RFLAGS in outregs*/
>> +             "pushf \n\t"
>> +             "popq 72+%[save] \n\t"
>> +             : [save]"+m"(save)
>> +             : "r"(insn_ram), "a"(mem)
>> +             : "memory", "cc"
>> +             );
>> +     outregs = save;
>> +}
>> +
>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>  {
>>      ++exceptions;
>> --
>> 1.7.9.5
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 12:44   ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 12:49     ` Gleb Natapov
  2013-06-09 12:56       ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 13:22       ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 2 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 12:49 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> Add a function trap_emulator to run an instruction in emulator.
> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> func with alt_insn_length. Get results in outregs.
> >>
> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >
> > %rax because trapping instruction uses it? Use one that does not use
> > register at all: MOV r/m32, imm32
> I don't know why set %rax before call alt_insn_page can cause error. I
> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> correctly.
We better find this out :)

> >
> > %rsp and %rbp because of ret on instruction page? Use the same trick
> > realmode.c test uses: have the code that sets/saves registers in
> > insn_page/alt_insn_page itself and copy the instruction you want to test
> > into the page itself instead of doing call.
> I don't know how instructions between calling insn_page and
> alt_insn_page are executed (function install_page and some other
> instructions before call *%1". If these insns are executed after
> insn_page is called, changes before the trapping instruction may
> affect the executing of these instructions.
> 
Not sure what do you mean here.

> >
> > Not sure what is so special about %rflags.
> >
> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >> ---
> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 67 insertions(+)
> >>  mode change 100644 => 100755 x86/emulator.c
> >>
> >> diff --git a/x86/emulator.c b/x86/emulator.c
> >> old mode 100644
> >> new mode 100755
> >> index 96576e5..770e8f7
> >> --- a/x86/emulator.c
> >> +++ b/x86/emulator.c
> >> @@ -11,6 +11,13 @@ int fails, tests;
> >>
> >>  static int exceptions;
> >>
> >> +struct regs {
> >> +     u64 rax, rbx, rcx, rdx;
> >> +     u64 rsi, rdi, rsp, rbp;
> >> +     u64 rip, rflags;
> >> +};
> >> +static struct regs inregs, outregs;
> >> +
> >>  void report(const char *name, int result)
> >>  {
> >>       ++tests;
> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >>  }
> >>
> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >> +                          uint8_t *alt_insn_page, void *insn_ram,
> >> +                          uint8_t *alt_insn, int alt_insn_length)
> >> +{
> >> +     ulong *cr3 = (ulong *)read_cr3();
> >> +     int i;
> >> +     static struct regs save;
> >> +
> >> +     // Pad with RET instructions
> >> +     memset(insn_page, 0xc3, 4096);
> >> +     memset(alt_insn_page, 0xc3, 4096);
> >> +
> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >> +     insn_page[1] = 0x00;
> >> +     insn_page[2] = 0x90; // nop
> >> +     insn_page[3] = 0xc3; // ret
> >> +
> >> +     // Place the instruction we want the hypervisor to see in the alternate page
> >> +     for (i=0; i<alt_insn_length; i++)
> >> +             alt_insn_page[i] = alt_insn[i];
> >> +     save = inregs;
> >> +
> >> +     // Load the code TLB with insn_page, but point the page tables at
> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> +     // This will make the CPU trap on the insn_page instruction but the
> >> +     // hypervisor will see alt_insn_page.
> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >> +     invlpg(insn_ram);
> >> +     // Load code TLB
> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >> +     // Trap, let hypervisor emulate at alt_insn_page
> >> +     asm volatile(
> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> +
> >> +             "call *%1\n\t"
> >> +
> >> +             "mov %%rax, 0+%[save] \n\t"
> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> +             "mov %%rsp, 48+%[save] \n\t"
> >> +             "mov %%rbp, 56+%[save] \n\t"
> >> +             /* Save RFLAGS in outregs*/
> >> +             "pushf \n\t"
> >> +             "popq 72+%[save] \n\t"
> >> +             : [save]"+m"(save)
> >> +             : "r"(insn_ram), "a"(mem)
> >> +             : "memory", "cc"
> >> +             );
> >> +     outregs = save;
> >> +}
> >> +
> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >>  {
> >>      ++exceptions;
> >> --
> >> 1.7.9.5
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 12:49     ` Gleb Natapov
@ 2013-06-09 12:56       ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 12:58         ` Gleb Natapov
  2013-06-09 13:22       ` 李春奇 <Arthur Chunqi Li>
  1 sibling, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 12:56 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> >> Add a function trap_emulator to run an instruction in emulator.
>> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> >> cannot set in inregs), put instruction codec in alt_insn and call
>> >> func with alt_insn_length. Get results in outregs.
>> >>
>> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>> >
>> > %rax because trapping instruction uses it? Use one that does not use
>> > register at all: MOV r/m32, imm32
>> I don't know why set %rax before call alt_insn_page can cause error. I
>> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
>> correctly.
> We better find this out :)
I will try to trace it.
>
>> >
>> > %rsp and %rbp because of ret on instruction page? Use the same trick
>> > realmode.c test uses: have the code that sets/saves registers in
>> > insn_page/alt_insn_page itself and copy the instruction you want to test
>> > into the page itself instead of doing call.
>> I don't know how instructions between calling insn_page and
>> alt_insn_page are executed (function install_page and some other
>> instructions before call *%1". If these insns are executed after
>> insn_page is called, changes before the trapping instruction may
>> affect the executing of these instructions.
>>
> Not sure what do you mean here.
Simply, I mean what is the executing sequence in that piece of codes.
Why instruction in alt_insn_page will be emulated?

>
>> >
>> > Not sure what is so special about %rflags.
>> >
>> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >> ---
>> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>  1 file changed, 67 insertions(+)
>> >>  mode change 100644 => 100755 x86/emulator.c
>> >>
>> >> diff --git a/x86/emulator.c b/x86/emulator.c
>> >> old mode 100644
>> >> new mode 100755
>> >> index 96576e5..770e8f7
>> >> --- a/x86/emulator.c
>> >> +++ b/x86/emulator.c
>> >> @@ -11,6 +11,13 @@ int fails, tests;
>> >>
>> >>  static int exceptions;
>> >>
>> >> +struct regs {
>> >> +     u64 rax, rbx, rcx, rdx;
>> >> +     u64 rsi, rdi, rsp, rbp;
>> >> +     u64 rip, rflags;
>> >> +};
>> >> +static struct regs inregs, outregs;
>> >> +
>> >>  void report(const char *name, int result)
>> >>  {
>> >>       ++tests;
>> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
>> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>  }
>> >>
>> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >> +                          uint8_t *alt_insn, int alt_insn_length)
>> >> +{
>> >> +     ulong *cr3 = (ulong *)read_cr3();
>> >> +     int i;
>> >> +     static struct regs save;
>> >> +
>> >> +     // Pad with RET instructions
>> >> +     memset(insn_page, 0xc3, 4096);
>> >> +     memset(alt_insn_page, 0xc3, 4096);
>> >> +
>> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >> +     insn_page[1] = 0x00;
>> >> +     insn_page[2] = 0x90; // nop
>> >> +     insn_page[3] = 0xc3; // ret
>> >> +
>> >> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >> +     for (i=0; i<alt_insn_length; i++)
>> >> +             alt_insn_page[i] = alt_insn[i];
>> >> +     save = inregs;
>> >> +
>> >> +     // Load the code TLB with insn_page, but point the page tables at
>> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> +     // This will make the CPU trap on the insn_page instruction but the
>> >> +     // hypervisor will see alt_insn_page.
>> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >> +     invlpg(insn_ram);
>> >> +     // Load code TLB
>> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >> +     // Trap, let hypervisor emulate at alt_insn_page
>> >> +     asm volatile(
>> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> +
>> >> +             "call *%1\n\t"
>> >> +
>> >> +             "mov %%rax, 0+%[save] \n\t"
>> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> +             "mov %%rsp, 48+%[save] \n\t"
>> >> +             "mov %%rbp, 56+%[save] \n\t"
>> >> +             /* Save RFLAGS in outregs*/
>> >> +             "pushf \n\t"
>> >> +             "popq 72+%[save] \n\t"
>> >> +             : [save]"+m"(save)
>> >> +             : "r"(insn_ram), "a"(mem)
>> >> +             : "memory", "cc"
>> >> +             );
>> >> +     outregs = save;
>> >> +}
>> >> +
>> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>  {
>> >>      ++exceptions;
>> >> --
>> >> 1.7.9.5
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 12:56       ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 12:58         ` Gleb Natapov
  0 siblings, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 12:58 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Sun, Jun 09, 2013 at 08:56:04PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> >> Add a function trap_emulator to run an instruction in emulator.
> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> >> func with alt_insn_length. Get results in outregs.
> >> >>
> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >> >
> >> > %rax because trapping instruction uses it? Use one that does not use
> >> > register at all: MOV r/m32, imm32
> >> I don't know why set %rax before call alt_insn_page can cause error. I
> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> >> correctly.
> > We better find this out :)
> I will try to trace it.
> >
> >> >
> >> > %rsp and %rbp because of ret on instruction page? Use the same trick
> >> > realmode.c test uses: have the code that sets/saves registers in
> >> > insn_page/alt_insn_page itself and copy the instruction you want to test
> >> > into the page itself instead of doing call.
> >> I don't know how instructions between calling insn_page and
> >> alt_insn_page are executed (function install_page and some other
> >> instructions before call *%1". If these insns are executed after
> >> insn_page is called, changes before the trapping instruction may
> >> affect the executing of these instructions.
> >>
> > Not sure what do you mean here.
> Simply, I mean what is the executing sequence in that piece of codes.
> Why instruction in alt_insn_page will be emulated?
> 
For the same reason it is emulated now. The trick is to have trapping
instruction and instruction we want to emulate at the same offset on
insn_page and alt_insn_page. Now the offset is 0, but it does not have
to be.

> >> >
> >> > Not sure what is so special about %rflags.
> >> >
> >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >> >> ---
> >> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> >>  1 file changed, 67 insertions(+)
> >> >>  mode change 100644 => 100755 x86/emulator.c
> >> >>
> >> >> diff --git a/x86/emulator.c b/x86/emulator.c
> >> >> old mode 100644
> >> >> new mode 100755
> >> >> index 96576e5..770e8f7
> >> >> --- a/x86/emulator.c
> >> >> +++ b/x86/emulator.c
> >> >> @@ -11,6 +11,13 @@ int fails, tests;
> >> >>
> >> >>  static int exceptions;
> >> >>
> >> >> +struct regs {
> >> >> +     u64 rax, rbx, rcx, rdx;
> >> >> +     u64 rsi, rdi, rsp, rbp;
> >> >> +     u64 rip, rflags;
> >> >> +};
> >> >> +static struct regs inregs, outregs;
> >> >> +
> >> >>  void report(const char *name, int result)
> >> >>  {
> >> >>       ++tests;
> >> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
> >> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >> >>  }
> >> >>
> >> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >> >> +                          uint8_t *alt_insn_page, void *insn_ram,
> >> >> +                          uint8_t *alt_insn, int alt_insn_length)
> >> >> +{
> >> >> +     ulong *cr3 = (ulong *)read_cr3();
> >> >> +     int i;
> >> >> +     static struct regs save;
> >> >> +
> >> >> +     // Pad with RET instructions
> >> >> +     memset(insn_page, 0xc3, 4096);
> >> >> +     memset(alt_insn_page, 0xc3, 4096);
> >> >> +
> >> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >> >> +     insn_page[1] = 0x00;
> >> >> +     insn_page[2] = 0x90; // nop
> >> >> +     insn_page[3] = 0xc3; // ret
> >> >> +
> >> >> +     // Place the instruction we want the hypervisor to see in the alternate page
> >> >> +     for (i=0; i<alt_insn_length; i++)
> >> >> +             alt_insn_page[i] = alt_insn[i];
> >> >> +     save = inregs;
> >> >> +
> >> >> +     // Load the code TLB with insn_page, but point the page tables at
> >> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> >> +     // This will make the CPU trap on the insn_page instruction but the
> >> >> +     // hypervisor will see alt_insn_page.
> >> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >> >> +     invlpg(insn_ram);
> >> >> +     // Load code TLB
> >> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >> >> +     // Trap, let hypervisor emulate at alt_insn_page
> >> >> +     asm volatile(
> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> >> +
> >> >> +             "call *%1\n\t"
> >> >> +
> >> >> +             "mov %%rax, 0+%[save] \n\t"
> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> >> +             "mov %%rsp, 48+%[save] \n\t"
> >> >> +             "mov %%rbp, 56+%[save] \n\t"
> >> >> +             /* Save RFLAGS in outregs*/
> >> >> +             "pushf \n\t"
> >> >> +             "popq 72+%[save] \n\t"
> >> >> +             : [save]"+m"(save)
> >> >> +             : "r"(insn_ram), "a"(mem)
> >> >> +             : "memory", "cc"
> >> >> +             );
> >> >> +     outregs = save;
> >> >> +}
> >> >> +
> >> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >> >>  {
> >> >>      ++exceptions;
> >> >> --
> >> >> 1.7.9.5
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 12:49     ` Gleb Natapov
  2013-06-09 12:56       ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 13:22       ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 14:09         ` Gleb Natapov
  1 sibling, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 13:22 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> >> Add a function trap_emulator to run an instruction in emulator.
>> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> >> cannot set in inregs), put instruction codec in alt_insn and call
>> >> func with alt_insn_length. Get results in outregs.
>> >>
>> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>> >
>> > %rax because trapping instruction uses it? Use one that does not use
>> > register at all: MOV r/m32, imm32
>> I don't know why set %rax before call alt_insn_page can cause error. I
>> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
>> correctly.
> We better find this out :)
I found that before calling alt_insn_page, address of "mem" is saved
to %rax, why?
>
>> >
>> > %rsp and %rbp because of ret on instruction page? Use the same trick
>> > realmode.c test uses: have the code that sets/saves registers in
>> > insn_page/alt_insn_page itself and copy the instruction you want to test
>> > into the page itself instead of doing call.
>> I don't know how instructions between calling insn_page and
>> alt_insn_page are executed (function install_page and some other
>> instructions before call *%1". If these insns are executed after
>> insn_page is called, changes before the trapping instruction may
>> affect the executing of these instructions.
>>
> Not sure what do you mean here.
>
>> >
>> > Not sure what is so special about %rflags.
>> >
>> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >> ---
>> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>  1 file changed, 67 insertions(+)
>> >>  mode change 100644 => 100755 x86/emulator.c
>> >>
>> >> diff --git a/x86/emulator.c b/x86/emulator.c
>> >> old mode 100644
>> >> new mode 100755
>> >> index 96576e5..770e8f7
>> >> --- a/x86/emulator.c
>> >> +++ b/x86/emulator.c
>> >> @@ -11,6 +11,13 @@ int fails, tests;
>> >>
>> >>  static int exceptions;
>> >>
>> >> +struct regs {
>> >> +     u64 rax, rbx, rcx, rdx;
>> >> +     u64 rsi, rdi, rsp, rbp;
>> >> +     u64 rip, rflags;
>> >> +};
>> >> +static struct regs inregs, outregs;
>> >> +
>> >>  void report(const char *name, int result)
>> >>  {
>> >>       ++tests;
>> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
>> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>  }
>> >>
>> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >> +                          uint8_t *alt_insn, int alt_insn_length)
>> >> +{
>> >> +     ulong *cr3 = (ulong *)read_cr3();
>> >> +     int i;
>> >> +     static struct regs save;
>> >> +
>> >> +     // Pad with RET instructions
>> >> +     memset(insn_page, 0xc3, 4096);
>> >> +     memset(alt_insn_page, 0xc3, 4096);
>> >> +
>> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >> +     insn_page[1] = 0x00;
>> >> +     insn_page[2] = 0x90; // nop
>> >> +     insn_page[3] = 0xc3; // ret
>> >> +
>> >> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >> +     for (i=0; i<alt_insn_length; i++)
>> >> +             alt_insn_page[i] = alt_insn[i];
>> >> +     save = inregs;
>> >> +
>> >> +     // Load the code TLB with insn_page, but point the page tables at
>> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> +     // This will make the CPU trap on the insn_page instruction but the
>> >> +     // hypervisor will see alt_insn_page.
>> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >> +     invlpg(insn_ram);
>> >> +     // Load code TLB
>> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >> +     // Trap, let hypervisor emulate at alt_insn_page
>> >> +     asm volatile(
>> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> +
>> >> +             "call *%1\n\t"
>> >> +
>> >> +             "mov %%rax, 0+%[save] \n\t"
>> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> +             "mov %%rsp, 48+%[save] \n\t"
>> >> +             "mov %%rbp, 56+%[save] \n\t"
>> >> +             /* Save RFLAGS in outregs*/
>> >> +             "pushf \n\t"
>> >> +             "popq 72+%[save] \n\t"
>> >> +             : [save]"+m"(save)
>> >> +             : "r"(insn_ram), "a"(mem)
>> >> +             : "memory", "cc"
>> >> +             );
>> >> +     outregs = save;
>> >> +}
>> >> +
>> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>  {
>> >>      ++exceptions;
>> >> --
>> >> 1.7.9.5
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 13:22       ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 14:09         ` Gleb Natapov
  2013-06-09 15:23           ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 14:09 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> >> Add a function trap_emulator to run an instruction in emulator.
> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> >> func with alt_insn_length. Get results in outregs.
> >> >>
> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >> >
> >> > %rax because trapping instruction uses it? Use one that does not use
> >> > register at all: MOV r/m32, imm32
> >> I don't know why set %rax before call alt_insn_page can cause error. I
> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> >> correctly.
> > We better find this out :)
> I found that before calling alt_insn_page, address of "mem" is saved
> to %rax, why?
Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
MMOI address mem is loaded into %rax before jumping into it.

> >
> >> >
> >> > %rsp and %rbp because of ret on instruction page? Use the same trick
> >> > realmode.c test uses: have the code that sets/saves registers in
> >> > insn_page/alt_insn_page itself and copy the instruction you want to test
> >> > into the page itself instead of doing call.
> >> I don't know how instructions between calling insn_page and
> >> alt_insn_page are executed (function install_page and some other
> >> instructions before call *%1". If these insns are executed after
> >> insn_page is called, changes before the trapping instruction may
> >> affect the executing of these instructions.
> >>
> > Not sure what do you mean here.
> >
> >> >
> >> > Not sure what is so special about %rflags.
> >> >
> >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >> >> ---
> >> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> >>  1 file changed, 67 insertions(+)
> >> >>  mode change 100644 => 100755 x86/emulator.c
> >> >>
> >> >> diff --git a/x86/emulator.c b/x86/emulator.c
> >> >> old mode 100644
> >> >> new mode 100755
> >> >> index 96576e5..770e8f7
> >> >> --- a/x86/emulator.c
> >> >> +++ b/x86/emulator.c
> >> >> @@ -11,6 +11,13 @@ int fails, tests;
> >> >>
> >> >>  static int exceptions;
> >> >>
> >> >> +struct regs {
> >> >> +     u64 rax, rbx, rcx, rdx;
> >> >> +     u64 rsi, rdi, rsp, rbp;
> >> >> +     u64 rip, rflags;
> >> >> +};
> >> >> +static struct regs inregs, outregs;
> >> >> +
> >> >>  void report(const char *name, int result)
> >> >>  {
> >> >>       ++tests;
> >> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
> >> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >> >>  }
> >> >>
> >> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >> >> +                          uint8_t *alt_insn_page, void *insn_ram,
> >> >> +                          uint8_t *alt_insn, int alt_insn_length)
> >> >> +{
> >> >> +     ulong *cr3 = (ulong *)read_cr3();
> >> >> +     int i;
> >> >> +     static struct regs save;
> >> >> +
> >> >> +     // Pad with RET instructions
> >> >> +     memset(insn_page, 0xc3, 4096);
> >> >> +     memset(alt_insn_page, 0xc3, 4096);
> >> >> +
> >> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >> >> +     insn_page[1] = 0x00;
> >> >> +     insn_page[2] = 0x90; // nop
> >> >> +     insn_page[3] = 0xc3; // ret
> >> >> +
> >> >> +     // Place the instruction we want the hypervisor to see in the alternate page
> >> >> +     for (i=0; i<alt_insn_length; i++)
> >> >> +             alt_insn_page[i] = alt_insn[i];
> >> >> +     save = inregs;
> >> >> +
> >> >> +     // Load the code TLB with insn_page, but point the page tables at
> >> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> >> +     // This will make the CPU trap on the insn_page instruction but the
> >> >> +     // hypervisor will see alt_insn_page.
> >> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >> >> +     invlpg(insn_ram);
> >> >> +     // Load code TLB
> >> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >> >> +     // Trap, let hypervisor emulate at alt_insn_page
> >> >> +     asm volatile(
> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> >> +
> >> >> +             "call *%1\n\t"
> >> >> +
> >> >> +             "mov %%rax, 0+%[save] \n\t"
> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
> >> >> +             "mov %%rsp, 48+%[save] \n\t"
> >> >> +             "mov %%rbp, 56+%[save] \n\t"
> >> >> +             /* Save RFLAGS in outregs*/
> >> >> +             "pushf \n\t"
> >> >> +             "popq 72+%[save] \n\t"
> >> >> +             : [save]"+m"(save)
> >> >> +             : "r"(insn_ram), "a"(mem)
> >> >> +             : "memory", "cc"
> >> >> +             );
> >> >> +     outregs = save;
> >> >> +}
> >> >> +
> >> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >> >>  {
> >> >>      ++exceptions;
> >> >> --
> >> >> 1.7.9.5
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 14:09         ` Gleb Natapov
@ 2013-06-09 15:23           ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 16:00             ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 15:23 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> >> >> Add a function trap_emulator to run an instruction in emulator.
>> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> >> >> cannot set in inregs), put instruction codec in alt_insn and call
>> >> >> func with alt_insn_length. Get results in outregs.
>> >> >>
>> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>> >> >
>> >> > %rax because trapping instruction uses it? Use one that does not use
>> >> > register at all: MOV r/m32, imm32
>> >> I don't know why set %rax before call alt_insn_page can cause error. I
>> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
>> >> correctly.
>> > We better find this out :)
>> I found that before calling alt_insn_page, address of "mem" is saved
>> to %rax, why?
> Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
> MMOI address mem is loaded into %rax before jumping into it.
I think this is why changing %rax will cause error. If we use mov
%eax, (%rax) to trigger vmexit, and %rax is changed before calling
alt_insn_page, codes in alt_insn_page will not be executed and return
directly.
I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
"mem" to %r8 before calling alt_insn_page, it runs OK.

Besides, I also don't know if changed %rflags may cause some
unpredictable actions, so now we just treat it with no error :)
>
>> >
>> >> >
>> >> > %rsp and %rbp because of ret on instruction page? Use the same trick
>> >> > realmode.c test uses: have the code that sets/saves registers in
>> >> > insn_page/alt_insn_page itself and copy the instruction you want to test
>> >> > into the page itself instead of doing call.
>> >> I don't know how instructions between calling insn_page and
>> >> alt_insn_page are executed (function install_page and some other
>> >> instructions before call *%1". If these insns are executed after
>> >> insn_page is called, changes before the trapping instruction may
>> >> affect the executing of these instructions.
>> >>
>> > Not sure what do you mean here.
>> >
>> >> >
>> >> > Not sure what is so special about %rflags.
>> >> >
>> >> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >> >> ---
>> >> >>  x86/emulator.c |   67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >> >>  1 file changed, 67 insertions(+)
>> >> >>  mode change 100644 => 100755 x86/emulator.c
>> >> >>
>> >> >> diff --git a/x86/emulator.c b/x86/emulator.c
>> >> >> old mode 100644
>> >> >> new mode 100755
>> >> >> index 96576e5..770e8f7
>> >> >> --- a/x86/emulator.c
>> >> >> +++ b/x86/emulator.c
>> >> >> @@ -11,6 +11,13 @@ int fails, tests;
>> >> >>
>> >> >>  static int exceptions;
>> >> >>
>> >> >> +struct regs {
>> >> >> +     u64 rax, rbx, rcx, rdx;
>> >> >> +     u64 rsi, rdi, rsp, rbp;
>> >> >> +     u64 rip, rflags;
>> >> >> +};
>> >> >> +static struct regs inregs, outregs;
>> >> >> +
>> >> >>  void report(const char *name, int result)
>> >> >>  {
>> >> >>       ++tests;
>> >> >> @@ -685,6 +692,66 @@ static void test_shld_shrd(u32 *mem)
>> >> >>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >> >>  }
>> >> >>
>> >> >> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >> >> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >> >> +                          uint8_t *alt_insn, int alt_insn_length)
>> >> >> +{
>> >> >> +     ulong *cr3 = (ulong *)read_cr3();
>> >> >> +     int i;
>> >> >> +     static struct regs save;
>> >> >> +
>> >> >> +     // Pad with RET instructions
>> >> >> +     memset(insn_page, 0xc3, 4096);
>> >> >> +     memset(alt_insn_page, 0xc3, 4096);
>> >> >> +
>> >> >> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >> >> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >> >> +     insn_page[1] = 0x00;
>> >> >> +     insn_page[2] = 0x90; // nop
>> >> >> +     insn_page[3] = 0xc3; // ret
>> >> >> +
>> >> >> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >> >> +     for (i=0; i<alt_insn_length; i++)
>> >> >> +             alt_insn_page[i] = alt_insn[i];
>> >> >> +     save = inregs;
>> >> >> +
>> >> >> +     // Load the code TLB with insn_page, but point the page tables at
>> >> >> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> >> +     // This will make the CPU trap on the insn_page instruction but the
>> >> >> +     // hypervisor will see alt_insn_page.
>> >> >> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >> >> +     invlpg(insn_ram);
>> >> >> +     // Load code TLB
>> >> >> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >> >> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >> >> +     // Trap, let hypervisor emulate at alt_insn_page
>> >> >> +     asm volatile(
>> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> >> +
>> >> >> +             "call *%1\n\t"
>> >> >> +
>> >> >> +             "mov %%rax, 0+%[save] \n\t"
>> >> >> +             "xchg %%rbx, 8+%[save] \n\t"
>> >> >> +             "xchg %%rcx, 16+%[save] \n\t"
>> >> >> +             "xchg %%rdx, 24+%[save] \n\t"
>> >> >> +             "xchg %%rsi, 32+%[save] \n\t"
>> >> >> +             "xchg %%rdi, 40+%[save] \n\t"
>> >> >> +             "mov %%rsp, 48+%[save] \n\t"
>> >> >> +             "mov %%rbp, 56+%[save] \n\t"
>> >> >> +             /* Save RFLAGS in outregs*/
>> >> >> +             "pushf \n\t"
>> >> >> +             "popq 72+%[save] \n\t"
>> >> >> +             : [save]"+m"(save)
>> >> >> +             : "r"(insn_ram), "a"(mem)
>> >> >> +             : "memory", "cc"
>> >> >> +             );
>> >> >> +     outregs = save;
>> >> >> +}
>> >> >> +
>> >> >>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >> >>  {
>> >> >>      ++exceptions;
>> >> >> --
>> >> >> 1.7.9.5
>> >> >
>> >> > --
>> >> >                         Gleb.
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 15:23           ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 16:00             ` Gleb Natapov
  2013-06-09 17:09               ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 16:00 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Sun, Jun 09, 2013 at 11:23:26PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> >> >> Add a function trap_emulator to run an instruction in emulator.
> >> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> >> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> >> >> func with alt_insn_length. Get results in outregs.
> >> >> >>
> >> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >> >> >
> >> >> > %rax because trapping instruction uses it? Use one that does not use
> >> >> > register at all: MOV r/m32, imm32
> >> >> I don't know why set %rax before call alt_insn_page can cause error. I
> >> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> >> >> correctly.
> >> > We better find this out :)
> >> I found that before calling alt_insn_page, address of "mem" is saved
> >> to %rax, why?
> > Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
> > MMOI address mem is loaded into %rax before jumping into it.
> I think this is why changing %rax will cause error. If we use mov
> %eax, (%rax) to trigger vmexit, and %rax is changed before calling
> alt_insn_page, codes in alt_insn_page will not be executed and return
> directly.
> I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
> "mem" to %r8 before calling alt_insn_page, it runs OK.
> 
Just use an instruction that does not use registers at all. mov $1, addr
where addr is immediate and encoded from mem parameter.

> Besides, I also don't know if changed %rflags may cause some
> unpredictable actions, so now we just treat it with no error :)
If test sets rflags to a value that causes crashes this is a test bug,
no need to prevent this from happening.

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 16:00             ` Gleb Natapov
@ 2013-06-09 17:09               ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 17:13                 ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 17:09 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

I have finished the infrastructure but after changing test_mmx_movq_mf
test case, it return error:
unhandled excecption 6
Return value from qemu: 15

If I don't change %rsp and %rbp, it runs OK.
So I wonder if this test case is strictly reply on %rsp and %rbp?

On Mon, Jun 10, 2013 at 12:00 AM, Gleb Natapov <gleb@redhat.com> wrote:
> On Sun, Jun 09, 2013 at 11:23:26PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> >> >> >> Add a function trap_emulator to run an instruction in emulator.
>> >> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> >> >> >> cannot set in inregs), put instruction codec in alt_insn and call
>> >> >> >> func with alt_insn_length. Get results in outregs.
>> >> >> >>
>> >> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>> >> >> >
>> >> >> > %rax because trapping instruction uses it? Use one that does not use
>> >> >> > register at all: MOV r/m32, imm32
>> >> >> I don't know why set %rax before call alt_insn_page can cause error. I
>> >> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
>> >> >> correctly.
>> >> > We better find this out :)
>> >> I found that before calling alt_insn_page, address of "mem" is saved
>> >> to %rax, why?
>> > Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
>> > MMOI address mem is loaded into %rax before jumping into it.
>> I think this is why changing %rax will cause error. If we use mov
>> %eax, (%rax) to trigger vmexit, and %rax is changed before calling
>> alt_insn_page, codes in alt_insn_page will not be executed and return
>> directly.
>> I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
>> "mem" to %r8 before calling alt_insn_page, it runs OK.
>>
> Just use an instruction that does not use registers at all. mov $1, addr
> where addr is immediate and encoded from mem parameter.
>
>> Besides, I also don't know if changed %rflags may cause some
>> unpredictable actions, so now we just treat it with no error :)
> If test sets rflags to a value that causes crashes this is a test bug,
> no need to prevent this from happening.
>
> --
>                         Gleb.



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 17:09               ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 17:13                 ` Gleb Natapov
  2013-06-09 17:28                   ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 17:13 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Mon, Jun 10, 2013 at 01:09:15AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> I have finished the infrastructure but after changing test_mmx_movq_mf
> test case, it return error:
> unhandled excecption 6
> Return value from qemu: 15
> 
> If I don't change %rsp and %rbp, it runs OK.
> So I wonder if this test case is strictly reply on %rsp and %rbp?
> 
I can't help without seeing the code.

> On Mon, Jun 10, 2013 at 12:00 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Sun, Jun 09, 2013 at 11:23:26PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> >> >> >> Add a function trap_emulator to run an instruction in emulator.
> >> >> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> >> >> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> >> >> >> func with alt_insn_length. Get results in outregs.
> >> >> >> >>
> >> >> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >> >> >> >
> >> >> >> > %rax because trapping instruction uses it? Use one that does not use
> >> >> >> > register at all: MOV r/m32, imm32
> >> >> >> I don't know why set %rax before call alt_insn_page can cause error. I
> >> >> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> >> >> >> correctly.
> >> >> > We better find this out :)
> >> >> I found that before calling alt_insn_page, address of "mem" is saved
> >> >> to %rax, why?
> >> > Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
> >> > MMOI address mem is loaded into %rax before jumping into it.
> >> I think this is why changing %rax will cause error. If we use mov
> >> %eax, (%rax) to trigger vmexit, and %rax is changed before calling
> >> alt_insn_page, codes in alt_insn_page will not be executed and return
> >> directly.
> >> I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
> >> "mem" to %r8 before calling alt_insn_page, it runs OK.
> >>
> > Just use an instruction that does not use registers at all. mov $1, addr
> > where addr is immediate and encoded from mem parameter.
> >
> >> Besides, I also don't know if changed %rflags may cause some
> >> unpredictable actions, so now we just treat it with no error :)
> > If test sets rflags to a value that causes crashes this is a test bug,
> > no need to prevent this from happening.
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 17:13                 ` Gleb Natapov
@ 2013-06-09 17:28                   ` 李春奇 <Arthur Chunqi Li>
  2013-06-09 17:39                     ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-09 17:28 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini

I have trouble to send the address of inregs into alt_insn_page, so I
use r9 and r10 to carry %rsp and %rbp into it. And r8 is used to
trigger vmexit.

I paste the relevant functions as follows:


static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
    uint8_t *alt_insn_page, void *insn_ram,
    uint8_t *alt_insn, int alt_insn_length)
{
    ulong *cr3 = (ulong *)read_cr3();
    int i;
    static struct regs save;

    // Pad with RET instructions
    memset(insn_page, 0x90, 4096);
    memset(alt_insn_page, 0x90, 4096);

    // Place a trapping instruction in the page to trigger a VMEXIT
    insn_page[0] = 0x49; //xchg   %rsp, %r9
    insn_page[1] = 0x87;
    insn_page[2] = 0xe1;
    insn_page[3] = 0x49; //xchg   %rbp, %r10
    insn_page[4] = 0x87;
    insn_page[5] = 0xea;
    insn_page[6] = 0x41; // mov %eax, (%r8)
    insn_page[7] = 0x89;
    insn_page[8] = 0x00; // ret

    // Place the instruction we want the hypervisor to see in the alternate page
    for (i=0; i<alt_insn_length; i++)
        alt_insn_page[i+6] = alt_insn[i];
    i+=6;
    insn_page[i++] = 0x49; //xchg   %rsp, %r9
    insn_page[i++] = 0x87;
    insn_page[i++] = 0xe1;
    insn_page[i++] = 0x49; //xchg   %rbp, %r10
    insn_page[i++] = 0x87;
    insn_page[i++] = 0xea;
    insn_page[i++] = 0xc3; // ret
    save = inregs;
    // Load the code TLB with insn_page, but point the page tables at
    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
    // This will make the CPU trap on the insn_page instruction but the
    // hypervisor will see alt_insn_page.
    install_page(cr3, virt_to_phys(insn_page), insn_ram);
    invlpg(insn_ram);
    // Load code TLB
    asm volatile("call *%0" : : "r"(insn_ram));
    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
    // Trap, let hypervisor emulate at alt_insn_page
    asm volatile(
    "mov %2, %%r8\n\r"
    "push 72+%[save]; popf\n\t"
    "xchg %%rax, 0+%[save]\n\t"
    "xchg %%rbx, 8+%[save] \n\t"
    "xchg %%rcx, 16+%[save] \n\t"
    "xchg %%rdx, 24+%[save] \n\t"
    "xchg %%rsi, 32+%[save] \n\t"
    "xchg %%rdi, 40+%[save] \n\t"
    "xchg %%r9, 48+%[save] \n\t" // %rsp in %r9
    "xchg %%r10, 56+%[save] \n\t" // %rbp in %r10

    "call *%1\n\t"

    "xchg %%rax, 0+%[save] \n\t"
    "xchg %%rbx, 8+%[save] \n\t"
    "xchg %%rcx, 16+%[save] \n\t"
    "xchg %%rdx, 24+%[save] \n\t"
    "xchg %%rsi, 32+%[save] \n\t"
    "xchg %%rdi, 40+%[save] \n\t"
    "xchg %%r9, 48+%[save] \n\t" // %rsp in %r9
    "xchg %%r10, 56+%[save] \n\t" // %rbp in %r10
    /* Save RFLAGS in outregs*/
    "pushf \n\t"
    "pop 72+%[save] \n\t"
    : [save]"=m"(save)
    : "r"(insn_ram), "r"(mem)
    : "memory", "cc", "r9", "r10", "r8"
    );
    outregs = save;
}


static void test_mmx_movq_mf(uint64_t *mem, uint8_t *insn_page,
    uint8_t *alt_insn_page, void *insn_ram)
{
    uint16_t fcw = 0;  // all exceptions unmasked
    uint8_t alt_insn[] = {0x0f, 0x7f, 0x00}; // movq %mm0, (%rax)

    write_cr0(read_cr0() & ~6);  // TS, EM
    exceptions = 0;
    handle_exception(MF_VECTOR, advance_rip_by_3_and_note_exception);
    asm volatile("fninit; fldcw %0" : : "m"(fcw));
    asm volatile("fldz; fldz; fdivp"); // generate exception

    inregs = (struct regs){ .rsp=0, .rbp=0 };
    trap_emulator(mem, insn_page, alt_insn_page, insn_ram,
alt_insn, 3);
    // exit MMX mode
    asm volatile("fnclex; emms");
    report("movq mmx generates #MF2", exceptions == 1);
    handle_exception(MF_VECTOR, 0);
}

On Mon, Jun 10, 2013 at 1:13 AM, Gleb Natapov <gleb@redhat.com> wrote:
> On Mon, Jun 10, 2013 at 01:09:15AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> I have finished the infrastructure but after changing test_mmx_movq_mf
>> test case, it return error:
>> unhandled excecption 6
>> Return value from qemu: 15
>>
>> If I don't change %rsp and %rbp, it runs OK.
>> So I wonder if this test case is strictly reply on %rsp and %rbp?
>>
> I can't help without seeing the code.
>
>> On Mon, Jun 10, 2013 at 12:00 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Sun, Jun 09, 2013 at 11:23:26PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
>> >> >> >> >> Add a function trap_emulator to run an instruction in emulator.
>> >> >> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
>> >> >> >> >> cannot set in inregs), put instruction codec in alt_insn and call
>> >> >> >> >> func with alt_insn_length. Get results in outregs.
>> >> >> >> >>
>> >> >> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
>> >> >> >> >
>> >> >> >> > %rax because trapping instruction uses it? Use one that does not use
>> >> >> >> > register at all: MOV r/m32, imm32
>> >> >> >> I don't know why set %rax before call alt_insn_page can cause error. I
>> >> >> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
>> >> >> >> correctly.
>> >> >> > We better find this out :)
>> >> >> I found that before calling alt_insn_page, address of "mem" is saved
>> >> >> to %rax, why?
>> >> > Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
>> >> > MMOI address mem is loaded into %rax before jumping into it.
>> >> I think this is why changing %rax will cause error. If we use mov
>> >> %eax, (%rax) to trigger vmexit, and %rax is changed before calling
>> >> alt_insn_page, codes in alt_insn_page will not be executed and return
>> >> directly.
>> >> I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
>> >> "mem" to %r8 before calling alt_insn_page, it runs OK.
>> >>
>> > Just use an instruction that does not use registers at all. mov $1, addr
>> > where addr is immediate and encoded from mem parameter.
>> >
>> >> Besides, I also don't know if changed %rflags may cause some
>> >> unpredictable actions, so now we just treat it with no error :)
>> > If test sets rflags to a value that causes crashes this is a test bug,
>> > no need to prevent this from happening.
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-09 17:28                   ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-09 17:39                     ` Gleb Natapov
  0 siblings, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-09 17:39 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: kvm, Paolo Bonzini

On Mon, Jun 10, 2013 at 01:28:48AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> I have trouble to send the address of inregs into alt_insn_page, so I
> use r9 and r10 to carry %rsp and %rbp into it. And r8 is used to
> trigger vmexit.
> 
If you going to use call you will need stack. I said how not to use it
and how not to use register for trapping instruction since you cannot
use r[0-7] registers because we will obviously want them to be part of
inregs/outreg.

> I paste the relevant functions as follows:
> 
> 
> static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>     uint8_t *alt_insn_page, void *insn_ram,
>     uint8_t *alt_insn, int alt_insn_length)
> {
>     ulong *cr3 = (ulong *)read_cr3();
>     int i;
>     static struct regs save;
> 
>     // Pad with RET instructions
>     memset(insn_page, 0x90, 4096);
>     memset(alt_insn_page, 0x90, 4096);
> 
>     // Place a trapping instruction in the page to trigger a VMEXIT
>     insn_page[0] = 0x49; //xchg   %rsp, %r9
>     insn_page[1] = 0x87;
>     insn_page[2] = 0xe1;
>     insn_page[3] = 0x49; //xchg   %rbp, %r10
>     insn_page[4] = 0x87;
>     insn_page[5] = 0xea;
>     insn_page[6] = 0x41; // mov %eax, (%r8)
>     insn_page[7] = 0x89;
>     insn_page[8] = 0x00; // ret
> 
>     // Place the instruction we want the hypervisor to see in the alternate page
>     for (i=0; i<alt_insn_length; i++)
>         alt_insn_page[i+6] = alt_insn[i];
>     i+=6;
>     insn_page[i++] = 0x49; //xchg   %rsp, %r9
>     insn_page[i++] = 0x87;
>     insn_page[i++] = 0xe1;
>     insn_page[i++] = 0x49; //xchg   %rbp, %r10
>     insn_page[i++] = 0x87;
>     insn_page[i++] = 0xea;
>     insn_page[i++] = 0xc3; // ret
>     save = inregs;
>     // Load the code TLB with insn_page, but point the page tables at
>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>     // This will make the CPU trap on the insn_page instruction but the
>     // hypervisor will see alt_insn_page.
>     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>     invlpg(insn_ram);
>     // Load code TLB
>     asm volatile("call *%0" : : "r"(insn_ram));
>     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>     // Trap, let hypervisor emulate at alt_insn_page
>     asm volatile(
>     "mov %2, %%r8\n\r"
>     "push 72+%[save]; popf\n\t"
>     "xchg %%rax, 0+%[save]\n\t"
>     "xchg %%rbx, 8+%[save] \n\t"
>     "xchg %%rcx, 16+%[save] \n\t"
>     "xchg %%rdx, 24+%[save] \n\t"
>     "xchg %%rsi, 32+%[save] \n\t"
>     "xchg %%rdi, 40+%[save] \n\t"
>     "xchg %%r9, 48+%[save] \n\t" // %rsp in %r9
>     "xchg %%r10, 56+%[save] \n\t" // %rbp in %r10
> 
>     "call *%1\n\t"
> 
>     "xchg %%rax, 0+%[save] \n\t"
>     "xchg %%rbx, 8+%[save] \n\t"
>     "xchg %%rcx, 16+%[save] \n\t"
>     "xchg %%rdx, 24+%[save] \n\t"
>     "xchg %%rsi, 32+%[save] \n\t"
>     "xchg %%rdi, 40+%[save] \n\t"
>     "xchg %%r9, 48+%[save] \n\t" // %rsp in %r9
>     "xchg %%r10, 56+%[save] \n\t" // %rbp in %r10
>     /* Save RFLAGS in outregs*/
>     "pushf \n\t"
>     "pop 72+%[save] \n\t"
>     : [save]"=m"(save)
>     : "r"(insn_ram), "r"(mem)
>     : "memory", "cc", "r9", "r10", "r8"
>     );
>     outregs = save;
> }
> 
> 
> static void test_mmx_movq_mf(uint64_t *mem, uint8_t *insn_page,
>     uint8_t *alt_insn_page, void *insn_ram)
> {
>     uint16_t fcw = 0;  // all exceptions unmasked
>     uint8_t alt_insn[] = {0x0f, 0x7f, 0x00}; // movq %mm0, (%rax)
> 
>     write_cr0(read_cr0() & ~6);  // TS, EM
>     exceptions = 0;
>     handle_exception(MF_VECTOR, advance_rip_by_3_and_note_exception);
>     asm volatile("fninit; fldcw %0" : : "m"(fcw));
>     asm volatile("fldz; fldz; fdivp"); // generate exception
> 
>     inregs = (struct regs){ .rsp=0, .rbp=0 };
>     trap_emulator(mem, insn_page, alt_insn_page, insn_ram,
> alt_insn, 3);
>     // exit MMX mode
>     asm volatile("fnclex; emms");
>     report("movq mmx generates #MF2", exceptions == 1);
>     handle_exception(MF_VECTOR, 0);
> }
> 
> On Mon, Jun 10, 2013 at 1:13 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Mon, Jun 10, 2013 at 01:09:15AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> I have finished the infrastructure but after changing test_mmx_movq_mf
> >> test case, it return error:
> >> unhandled excecption 6
> >> Return value from qemu: 15
> >>
> >> If I don't change %rsp and %rbp, it runs OK.
> >> So I wonder if this test case is strictly reply on %rsp and %rbp?
> >>
> > I can't help without seeing the code.
> >
> >> On Mon, Jun 10, 2013 at 12:00 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Sun, Jun 09, 2013 at 11:23:26PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Sun, Jun 9, 2013 at 10:09 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Sun, Jun 09, 2013 at 09:22:27PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> On Sun, Jun 9, 2013 at 8:49 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Sun, Jun 09, 2013 at 08:44:32PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> On Sun, Jun 9, 2013 at 7:07 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> >> > On Fri, Jun 07, 2013 at 10:31:38AM +0800, Arthur Chunqi Li wrote:
> >> >> >> >> >> Add a function trap_emulator to run an instruction in emulator.
> >> >> >> >> >> Set inregs first (%rax, %rsp, %rbp, %rflags have special usage and
> >> >> >> >> >> cannot set in inregs), put instruction codec in alt_insn and call
> >> >> >> >> >> func with alt_insn_length. Get results in outregs.
> >> >> >> >> >>
> >> >> >> >> > Why %rax, %rsp, %rbp, %rflags cannot be set in inregs?
> >> >> >> >> >
> >> >> >> >> > %rax because trapping instruction uses it? Use one that does not use
> >> >> >> >> > register at all: MOV r/m32, imm32
> >> >> >> >> I don't know why set %rax before call alt_insn_page can cause error. I
> >> >> >> >> use "xchg %%rax, 0+%[save]" before "call *%1" and the %rcx is not set
> >> >> >> >> correctly.
> >> >> >> > We better find this out :)
> >> >> >> I found that before calling alt_insn_page, address of "mem" is saved
> >> >> >> to %rax, why?
> >> >> > Because instruction that we use to trigger vmexit is mov %eax, (%rax) so
> >> >> > MMOI address mem is loaded into %rax before jumping into it.
> >> >> I think this is why changing %rax will cause error. If we use mov
> >> >> %eax, (%rax) to trigger vmexit, and %rax is changed before calling
> >> >> alt_insn_page, codes in alt_insn_page will not be executed and return
> >> >> directly.
> >> >> I changed the codes which trigger vmexit to "mov %eax, (%r8)" and set
> >> >> "mem" to %r8 before calling alt_insn_page, it runs OK.
> >> >>
> >> > Just use an instruction that does not use registers at all. mov $1, addr
> >> > where addr is immediate and encoded from mem parameter.
> >> >
> >> >> Besides, I also don't know if changed %rflags may cause some
> >> >> unpredictable actions, so now we just treat it with no error :)
> >> > If test sets rflags to a value that causes crashes this is a test bug,
> >> > no need to prevent this from happening.
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-10 13:38 Arthur Chunqi Li
  2013-06-10 17:36 ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-10 13:38 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |  106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/x86/emulator.c b/x86/emulator.c
index 96576e5..a1bd92e 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,13 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 rip, rflags;
+};
+static struct regs inregs, outregs;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +692,105 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
+			     uint8_t *alt_insn_page, void *insn_ram,
+			     uint8_t* alt_insn, int alt_insn_length, int reserve_stack)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	int i;
+	static struct regs save;
+
+	// Pad with RET instructions
+	memset(insn_page, 0x90, 4096);
+	memset(alt_insn_page, 0x90, 4096);
+
+	asm volatile(
+		"movw $1, %0\n\t"
+		: : "m"(mem)
+		: "memory"
+		);
+	// Place a trapping instruction in the page to trigger a VMEXIT
+	insn_page[0] = 0xc3; // ret
+	if (!reserve_stack)
+	{
+		insn_page[1] = 0x49; // xchg   %rsp,%r9
+		insn_page[2] = 0x87;
+		insn_page[3] = 0xe1;
+		insn_page[4] = 0x49; // xchg   %rbp,%r10
+		insn_page[5] = 0x87;
+		insn_page[6] = 0xea;
+	}
+	//in  (%dx),%al, may change in the future
+	insn_page[7] = 0xec;
+
+	// Place the instruction we want the hypervisor to see in the alternate page
+	for (i=7; i<alt_insn_length+7; i++)
+		alt_insn_page[i] = alt_insn[i-7];
+
+	if (!reserve_stack)
+	{
+		insn_page[i+0] = 0x49; // xchg   %rsp,%r9
+		insn_page[i+1] = 0x87;
+		insn_page[i+2] = 0xe1;
+		insn_page[i+3] = 0x49; // xchg   %rbp,%r10
+		insn_page[i+4] = 0x87;
+		insn_page[i+5] = 0xea;
+	}
+	else
+	{
+		insn_page[i+0] = 0x49; // mov   %rsp,%r9
+		insn_page[i+1] = 0x89;
+		insn_page[i+2] = 0xe1;
+		insn_page[i+3] = 0x49; // mov   %rbp,%r10
+		insn_page[i+4] = 0x89;
+		insn_page[i+5] = 0xea;
+	}
+	insn_page[i+6] = 0xc3; // ret
+
+	save = inregs;
+	
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile(
+		"push 72+%[save]; popf\n\t"
+		"mov %2, %%r8\n\t"
+		"xchg %%rax, 0+%[save] \n\t"
+		"xchg %%rbx, 8+%[save] \n\t"
+		"xchg %%rcx, 16+%[save] \n\t"
+		"xchg %%rdx, 24+%[save] \n\t"
+		"xchg %%rsi, 32+%[save] \n\t"
+		"xchg %%rdi, 40+%[save] \n\t"
+		"xchg %%r9, 48+%[save]\n\t"
+		"xchg %%r10, 56+%[save]\n\t"
+
+		"call *%1\n\t"
+
+		"xchg %%rax, 0+%[save] \n\t"
+		"xchg %%rbx, 8+%[save] \n\t"
+		"xchg %%rcx, 16+%[save] \n\t"
+		"xchg %%rdx, 24+%[save] \n\t"
+		"xchg %%rsi, 32+%[save] \n\t"
+		"xchg %%rdi, 40+%[save] \n\t"
+		"xchg %%r9, 48+%[save] \n\t"
+		"xchg %%r10, 56+%[save] \n\t"
+		/* Save RFLAGS in outregs*/
+		"pushf \n\t"
+		"pop 72+%[save] \n\t"
+		: [save]"+m"(save)
+		: "r"(insn_ram+1), "r"(mem)
+		: "memory", "cc", "r8", "r9", "r10"
+		);
+	outregs = save;
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-10 13:38 Arthur Chunqi Li
@ 2013-06-10 17:36 ` Gleb Natapov
  0 siblings, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-10 17:36 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, pbonzini

On Mon, Jun 10, 2013 at 09:38:32PM +0800, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
That's far from what I meant :( As I said before inregs/outregs should
contain r[0-7] too so you cannot use then as tmp vars to save %rbp/%rsp.
My ideas is that the code to save/restore register (all the xchg
instructions) should be part of the code in insn_page/alt_insn_page.
Instead of call in the middle just put trapping instruction there on
insn_page (in  (%dx),%al is a good one) padded with nops to the max
instruction length. alt_insn_page will have an instruction we want to
test at the same offset. This way you can call insn_page freely since
stack register during entry and return are unchanged, all the register
are saved and restored by the code on insn_page itself.

> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |  106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 106 insertions(+)
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..a1bd92e 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,13 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 rip, rflags;
> +};
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +692,105 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +			     uint8_t *alt_insn_page, void *insn_ram,
> +			     uint8_t* alt_insn, int alt_insn_length, int reserve_stack)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	int i;
> +	static struct regs save;
> +
> +	// Pad with RET instructions
> +	memset(insn_page, 0x90, 4096);
> +	memset(alt_insn_page, 0x90, 4096);
> +
> +	asm volatile(
> +		"movw $1, %0\n\t"
> +		: : "m"(mem)
> +		: "memory"
> +		);
> +	// Place a trapping instruction in the page to trigger a VMEXIT
> +	insn_page[0] = 0xc3; // ret
> +	if (!reserve_stack)
> +	{
> +		insn_page[1] = 0x49; // xchg   %rsp,%r9
> +		insn_page[2] = 0x87;
> +		insn_page[3] = 0xe1;
> +		insn_page[4] = 0x49; // xchg   %rbp,%r10
> +		insn_page[5] = 0x87;
> +		insn_page[6] = 0xea;
> +	}
> +	//in  (%dx),%al, may change in the future
> +	insn_page[7] = 0xec;
> +
> +	// Place the instruction we want the hypervisor to see in the alternate page
> +	for (i=7; i<alt_insn_length+7; i++)
> +		alt_insn_page[i] = alt_insn[i-7];
> +
> +	if (!reserve_stack)
> +	{
> +		insn_page[i+0] = 0x49; // xchg   %rsp,%r9
> +		insn_page[i+1] = 0x87;
> +		insn_page[i+2] = 0xe1;
> +		insn_page[i+3] = 0x49; // xchg   %rbp,%r10
> +		insn_page[i+4] = 0x87;
> +		insn_page[i+5] = 0xea;
> +	}
> +	else
> +	{
> +		insn_page[i+0] = 0x49; // mov   %rsp,%r9
> +		insn_page[i+1] = 0x89;
> +		insn_page[i+2] = 0xe1;
> +		insn_page[i+3] = 0x49; // mov   %rbp,%r10
> +		insn_page[i+4] = 0x89;
> +		insn_page[i+5] = 0xea;
> +	}
> +	insn_page[i+6] = 0xc3; // ret
> +
> +	save = inregs;
> +	
> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile(
> +		"push 72+%[save]; popf\n\t"
> +		"mov %2, %%r8\n\t"
> +		"xchg %%rax, 0+%[save] \n\t"
> +		"xchg %%rbx, 8+%[save] \n\t"
> +		"xchg %%rcx, 16+%[save] \n\t"
> +		"xchg %%rdx, 24+%[save] \n\t"
> +		"xchg %%rsi, 32+%[save] \n\t"
> +		"xchg %%rdi, 40+%[save] \n\t"
> +		"xchg %%r9, 48+%[save]\n\t"
> +		"xchg %%r10, 56+%[save]\n\t"
> +
> +		"call *%1\n\t"
> +
> +		"xchg %%rax, 0+%[save] \n\t"
> +		"xchg %%rbx, 8+%[save] \n\t"
> +		"xchg %%rcx, 16+%[save] \n\t"
> +		"xchg %%rdx, 24+%[save] \n\t"
> +		"xchg %%rsi, 32+%[save] \n\t"
> +		"xchg %%rdi, 40+%[save] \n\t"
> +		"xchg %%r9, 48+%[save] \n\t"
> +		"xchg %%r10, 56+%[save] \n\t"
> +		/* Save RFLAGS in outregs*/
> +		"pushf \n\t"
> +		"pop 72+%[save] \n\t"
> +		: [save]"+m"(save)
> +		: "r"(insn_ram+1), "r"(mem)
> +		: "memory", "cc", "r8", "r9", "r10"
> +		);
> +	outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> -- 
> 1.7.9.5

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-06 15:24 [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator Arthur Chunqi Li
  2013-06-06 15:24 ` [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator Arthur Chunqi Li
  2013-06-07  2:14 ` [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator 李春奇 <Arthur Chunqi Li>
@ 2013-06-12 20:50 ` Paolo Bonzini
  2013-06-13  4:50   ` 李春奇 <Arthur Chunqi Li>
  2 siblings, 1 reply; 54+ messages in thread
From: Paolo Bonzini @ 2013-06-12 20:50 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, gleb

Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 81 insertions(+)
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..8ab9904 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,14 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 rip, rflags;
> +};
> +
> +static struct regs inregs, outregs;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> +			     uint8_t *alt_insn_page, void *insn_ram,
> +			     uint8_t *alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	int i;
> +
> +	// Pad with RET instructions
> +	memset(insn_page, 0xc3, 4096);
> +	memset(alt_insn_page, 0xc3, 4096);
> +
> +	// Place a trapping instruction in the page to trigger a VMEXIT
> +	insn_page[0] = 0x89; // mov %eax, (%rax)
> +	insn_page[1] = 0x00;
> +	insn_page[2] = 0x90; // nop
> +	insn_page[3] = 0xc3; // ret
> +
> +	// Place the instruction we want the hypervisor to see in the alternate page
> +	for (i=0; i<alt_insn_length; i++)
> +		alt_insn_page[i] = alt_insn[i];
> +
> +	// Save general registers
> +	asm volatile(
> +		"push %rax\n\r"
> +		"push %rbx\n\r"
> +		"push %rcx\n\r"
> +		"push %rdx\n\r"
> +		"push %rsi\n\r"
> +		"push %rdi\n\r"
> +		);

This will not work if GCC is using rsp-relative addresses to access
local variables.  You need to use mov instructions to load from inregs,
and put the push/pop sequences inside the "main" asm that does the "call
*%1".

Paolo

> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram + 3));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile(
> +		"call *%1\n\r"
> +
> +		"mov %%rax, 0+%[outregs] \n\t"
> +		"mov %%rbx, 8+%[outregs] \n\t"
> +		"mov %%rcx, 16+%[outregs] \n\t"
> +		"mov %%rdx, 24+%[outregs] \n\t"
> +		"mov %%rsi, 32+%[outregs] \n\t"
> +		"mov %%rdi, 40+%[outregs] \n\t"
> +		"mov %%rsp,48+ %[outregs] \n\t"
> +		"mov %%rbp, 56+%[outregs] \n\t"
> +
> +		/* Save RFLAGS in outregs*/
> +		"pushf \n\t"
> +		"popq 72+%[outregs] \n\t"
> +		: [outregs]"+m"(outregs)
> +		: "r"(insn_ram),
> +			"a"(mem), "b"(inregs.rbx),
> +			"c"(inregs.rcx), "d"(inregs.rdx),
> +			"S"(inregs.rsi), "D"(inregs.rdi)
> +		: "memory", "cc"
> +		);
> +	// Restore general registers
> +	asm volatile(
> +		"pop %rax\n\r"
> +		"pop %rbx\n\r"
> +		"pop %rcx\n\r"
> +		"pop %rdx\n\r"
> +		"pop %rsi\n\r"
> +		"pop %rdi\n\r"
> +		);
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> 


^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator
  2013-06-06 15:24 ` [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator Arthur Chunqi Li
@ 2013-06-12 20:51   ` Paolo Bonzini
  0 siblings, 0 replies; 54+ messages in thread
From: Paolo Bonzini @ 2013-06-12 20:51 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, gleb

Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> Change two functions (test_mmx_movq_mf and test_movabs) using
> unified trap_emulator.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |   66 ++++++++++++--------------------------------------------
>  1 file changed, 14 insertions(+), 52 deletions(-)
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 8ab9904..fa8993f 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -776,72 +776,34 @@ static void test_mmx_movq_mf(uint64_t *mem, uint8_t *insn_page,
>  			     uint8_t *alt_insn_page, void *insn_ram)
>  {
>      uint16_t fcw = 0;  // all exceptions unmasked
> -    ulong *cr3 = (ulong *)read_cr3();
> +    uint8_t alt_insn[] = {0x0f, 0x7f, 0x00}; // movq %mm0, (%rax)
>  
>      write_cr0(read_cr0() & ~6);  // TS, EM
> -    // Place a trapping instruction in the page to trigger a VMEXIT
> -    insn_page[0] = 0x89; // mov %eax, (%rax)
> -    insn_page[1] = 0x00;
> -    insn_page[2] = 0x90; // nop
> -    insn_page[3] = 0xc3; // ret
> -    // Place the instruction we want the hypervisor to see in the alternate page
> -    alt_insn_page[0] = 0x0f; // movq %mm0, (%rax)
> -    alt_insn_page[1] = 0x7f;
> -    alt_insn_page[2] = 0x00;
> -    alt_insn_page[3] = 0xc3; // ret
> -
>      exceptions = 0;
>      handle_exception(MF_VECTOR, advance_rip_by_3_and_note_exception);
> -
> -    // Load the code TLB with insn_page, but point the page tables at
> -    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> -    // This will make the CPU trap on the insn_page instruction but the
> -    // hypervisor will see alt_insn_page.
> -    install_page(cr3, virt_to_phys(insn_page), insn_ram);
>      asm volatile("fninit; fldcw %0" : : "m"(fcw));
>      asm volatile("fldz; fldz; fdivp"); // generate exception
> -    invlpg(insn_ram);
> -    // Load code TLB
> -    asm volatile("call *%0" : : "r"(insn_ram + 3));
> -    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> -    // Trap, let hypervisor emulate at alt_insn_page
> -    asm volatile("call *%0" : : "r"(insn_ram), "a"(mem));
> +
> +    inregs = (struct regs){ 0 };
> +    trap_emulator(mem, insn_page, alt_insn_page, insn_ram, 
> +				alt_insn, 3);
>      // exit MMX mode
>      asm volatile("fnclex; emms");
> -    report("movq mmx generates #MF", exceptions == 1);
> +    report("movq mmx generates #MF2", exceptions == 1);

Extra hunk that is not needed?  Otherwise it looks good.

Thanks,

Paolo

>      handle_exception(MF_VECTOR, 0);
>  }
>  
>  static void test_movabs(uint64_t *mem, uint8_t *insn_page,
>  		       uint8_t *alt_insn_page, void *insn_ram)
>  {
> -    uint64_t val = 0;
> -    ulong *cr3 = (ulong *)read_cr3();
> -
> -    // Pad with RET instructions
> -    memset(insn_page, 0xc3, 4096);
> -    memset(alt_insn_page, 0xc3, 4096);
> -    // Place a trapping instruction in the page to trigger a VMEXIT
> -    insn_page[0] = 0x89; // mov %eax, (%rax)
> -    insn_page[1] = 0x00;
> -    // Place the instruction we want the hypervisor to see in the alternate
> -    // page. A buggy hypervisor will fetch a 32-bit immediate and return
> -    // 0xffffffffc3c3c3c3.
> -    alt_insn_page[0] = 0x48; // mov $0xc3c3c3c3c3c3c3c3, %rcx
> -    alt_insn_page[1] = 0xb9;
> -
> -    // Load the code TLB with insn_page, but point the page tables at
> -    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> -    // This will make the CPU trap on the insn_page instruction but the
> -    // hypervisor will see alt_insn_page.
> -    install_page(cr3, virt_to_phys(insn_page), insn_ram);
> -    // Load code TLB
> -    invlpg(insn_ram);
> -    asm volatile("call *%0" : : "r"(insn_ram + 3));
> -    // Trap, let hypervisor emulate at alt_insn_page
> -    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> -    asm volatile("call *%1" : "=c"(val) : "r"(insn_ram), "a"(mem), "c"(0));
> -    report("64-bit mov imm", val == 0xc3c3c3c3c3c3c3c3);
> +    // mov $0xc3c3c3c3c3c3c3c3, %rcx
> +    uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
> +					0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
> +    inregs = (struct regs){ .rcx = 0 };
> +    
> +    trap_emulator(mem, insn_page, alt_insn_page, insn_ram,
> +				alt_insn, 10);
> +    report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>  }
>  
>  static void test_crosspage_mmio(volatile uint8_t *mem)
> 


^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-12 20:50 ` Paolo Bonzini
@ 2013-06-13  4:50   ` 李春奇 <Arthur Chunqi Li>
  2013-06-13  9:30     ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-13  4:50 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, Gleb Natapov

On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> Add a function trap_emulator to run an instruction in emulator.
>> Set inregs first (%rax is invalid because it is used as return
>> address), put instruction codec in alt_insn and call func with
>> alt_insn_length. Get results in outregs.
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 81 insertions(+)
>>
>> diff --git a/x86/emulator.c b/x86/emulator.c
>> index 96576e5..8ab9904 100644
>> --- a/x86/emulator.c
>> +++ b/x86/emulator.c
>> @@ -11,6 +11,14 @@ int fails, tests;
>>
>>  static int exceptions;
>>
>> +struct regs {
>> +     u64 rax, rbx, rcx, rdx;
>> +     u64 rsi, rdi, rsp, rbp;
>> +     u64 rip, rflags;
>> +};
>> +
>> +static struct regs inregs, outregs;
>> +
>>  void report(const char *name, int result)
>>  {
>>       ++tests;
>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>  }
>>
>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> +                          uint8_t *alt_insn, int alt_insn_length)
>> +{
>> +     ulong *cr3 = (ulong *)read_cr3();
>> +     int i;
>> +
>> +     // Pad with RET instructions
>> +     memset(insn_page, 0xc3, 4096);
>> +     memset(alt_insn_page, 0xc3, 4096);
>> +
>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> +     insn_page[1] = 0x00;
>> +     insn_page[2] = 0x90; // nop
>> +     insn_page[3] = 0xc3; // ret
>> +
>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> +     for (i=0; i<alt_insn_length; i++)
>> +             alt_insn_page[i] = alt_insn[i];
>> +
>> +     // Save general registers
>> +     asm volatile(
>> +             "push %rax\n\r"
>> +             "push %rbx\n\r"
>> +             "push %rcx\n\r"
>> +             "push %rdx\n\r"
>> +             "push %rsi\n\r"
>> +             "push %rdi\n\r"
>> +             );
>
> This will not work if GCC is using rsp-relative addresses to access
> local variables.  You need to use mov instructions to load from inregs,
> and put the push/pop sequences inside the "main" asm that does the "call
> *%1".
Is there any way to let gcc use absolute address to access variables?
I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
and it seems that addressing for "save" is wrong.

Arthur
>
> Paolo
>
>> +     // Load the code TLB with insn_page, but point the page tables at
>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> +     // This will make the CPU trap on the insn_page instruction but the
>> +     // hypervisor will see alt_insn_page.
>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> +     invlpg(insn_ram);
>> +     // Load code TLB
>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> +     // Trap, let hypervisor emulate at alt_insn_page
>> +     asm volatile(
>> +             "call *%1\n\r"
>> +
>> +             "mov %%rax, 0+%[outregs] \n\t"
>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> +
>> +             /* Save RFLAGS in outregs*/
>> +             "pushf \n\t"
>> +             "popq 72+%[outregs] \n\t"
>> +             : [outregs]"+m"(outregs)
>> +             : "r"(insn_ram),
>> +                     "a"(mem), "b"(inregs.rbx),
>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> +             : "memory", "cc"
>> +             );
>> +     // Restore general registers
>> +     asm volatile(
>> +             "pop %rax\n\r"
>> +             "pop %rbx\n\r"
>> +             "pop %rcx\n\r"
>> +             "pop %rdx\n\r"
>> +             "pop %rsi\n\r"
>> +             "pop %rdi\n\r"
>> +             );
>> +}
>> +
>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>  {
>>      ++exceptions;
>>
>



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-13  4:50   ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-13  9:30     ` 李春奇 <Arthur Chunqi Li>
  2013-06-13 13:12       ` Paolo Bonzini
  2013-06-18 12:45       ` Gleb Natapov
  0 siblings, 2 replies; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-13  9:30 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: kvm, Gleb Natapov

Hi Gleb,
I'm trying to solve these problems in the past days and meet many
difficulties. You want to save all the general registers in calling
insn_page, so registers should be saved to (save) in insn_page.
Because all the instructions should be generated outside and copy to
insn_page, and the instructions generated outside is RIP-relative, so
inside insn_page (save) will be wrong pointed with RIP-relative code.

I have tried to move (save) into insn_page. But when calling
insn_page, data in it can only be read and any instructions like "xchg
%%rax, 0+%[save]" may cause error, because at this time read is from
TLB but write will cause inconsistent.

Another way is disabling RIP-relative code, but I failed when using
"-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
Is there any way to totally disable RIP-relative code? Besides, using
this feature may specified to some newer C compiler. This may not be a
good solution.

If we don't set %rsp and %rbp when executing emulator code, we can
just use “push/pop" to save other general registers.

If you have any better solutions, please let me know.

Thanks,
Arthur

On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
<yzt356@gmail.com> wrote:
> On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>>
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 81 insertions(+)
>>>
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> index 96576e5..8ab9904 100644
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,14 @@ int fails, tests;
>>>
>>>  static int exceptions;
>>>
>>> +struct regs {
>>> +     u64 rax, rbx, rcx, rdx;
>>> +     u64 rsi, rdi, rsp, rbp;
>>> +     u64 rip, rflags;
>>> +};
>>> +
>>> +static struct regs inregs, outregs;
>>> +
>>>  void report(const char *name, int result)
>>>  {
>>>       ++tests;
>>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>  }
>>>
>>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>>> +                          uint8_t *alt_insn_page, void *insn_ram,
>>> +                          uint8_t *alt_insn, int alt_insn_length)
>>> +{
>>> +     ulong *cr3 = (ulong *)read_cr3();
>>> +     int i;
>>> +
>>> +     // Pad with RET instructions
>>> +     memset(insn_page, 0xc3, 4096);
>>> +     memset(alt_insn_page, 0xc3, 4096);
>>> +
>>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>>> +     insn_page[1] = 0x00;
>>> +     insn_page[2] = 0x90; // nop
>>> +     insn_page[3] = 0xc3; // ret
>>> +
>>> +     // Place the instruction we want the hypervisor to see in the alternate page
>>> +     for (i=0; i<alt_insn_length; i++)
>>> +             alt_insn_page[i] = alt_insn[i];
>>> +
>>> +     // Save general registers
>>> +     asm volatile(
>>> +             "push %rax\n\r"
>>> +             "push %rbx\n\r"
>>> +             "push %rcx\n\r"
>>> +             "push %rdx\n\r"
>>> +             "push %rsi\n\r"
>>> +             "push %rdi\n\r"
>>> +             );
>>
>> This will not work if GCC is using rsp-relative addresses to access
>> local variables.  You need to use mov instructions to load from inregs,
>> and put the push/pop sequences inside the "main" asm that does the "call
>> *%1".
> Is there any way to let gcc use absolute address to access variables?
> I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
> and it seems that addressing for "save" is wrong.
>
> Arthur
>>
>> Paolo
>>
>>> +     // Load the code TLB with insn_page, but point the page tables at
>>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +     // This will make the CPU trap on the insn_page instruction but the
>>> +     // hypervisor will see alt_insn_page.
>>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +     invlpg(insn_ram);
>>> +     // Load code TLB
>>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +     // Trap, let hypervisor emulate at alt_insn_page
>>> +     asm volatile(
>>> +             "call *%1\n\r"
>>> +
>>> +             "mov %%rax, 0+%[outregs] \n\t"
>>> +             "mov %%rbx, 8+%[outregs] \n\t"
>>> +             "mov %%rcx, 16+%[outregs] \n\t"
>>> +             "mov %%rdx, 24+%[outregs] \n\t"
>>> +             "mov %%rsi, 32+%[outregs] \n\t"
>>> +             "mov %%rdi, 40+%[outregs] \n\t"
>>> +             "mov %%rsp,48+ %[outregs] \n\t"
>>> +             "mov %%rbp, 56+%[outregs] \n\t"
>>> +
>>> +             /* Save RFLAGS in outregs*/
>>> +             "pushf \n\t"
>>> +             "popq 72+%[outregs] \n\t"
>>> +             : [outregs]"+m"(outregs)
>>> +             : "r"(insn_ram),
>>> +                     "a"(mem), "b"(inregs.rbx),
>>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>>> +             : "memory", "cc"
>>> +             );
>>> +     // Restore general registers
>>> +     asm volatile(
>>> +             "pop %rax\n\r"
>>> +             "pop %rbx\n\r"
>>> +             "pop %rcx\n\r"
>>> +             "pop %rdx\n\r"
>>> +             "pop %rsi\n\r"
>>> +             "pop %rdi\n\r"
>>> +             );
>>> +}
>>> +
>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>  {
>>>      ++exceptions;
>>>
>>
>
>
>
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-13  9:30     ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-13 13:12       ` Paolo Bonzini
  2013-06-18 12:45       ` Gleb Natapov
  1 sibling, 0 replies; 54+ messages in thread
From: Paolo Bonzini @ 2013-06-13 13:12 UTC (permalink / raw)
  To: "李春奇 <Arthur Chunqi Li>"
  Cc: kvm, Gleb Natapov

Il 13/06/2013 05:30, 李春奇 <Arthur Chunqi Li> ha scritto:
> Hi Gleb,
> I'm trying to solve these problems in the past days and meet many
> difficulties. You want to save all the general registers in calling
> insn_page, so registers should be saved to (save) in insn_page.
> Because all the instructions should be generated outside and copy to
> insn_page, and the instructions generated outside is RIP-relative, so
> inside insn_page (save) will be wrong pointed with RIP-relative code.
> 
> I have tried to move (save) into insn_page. But when calling
> insn_page, data in it can only be read and any instructions like "xchg
> %%rax, 0+%[save]" may cause error, because at this time read is from
> TLB but write will cause inconsistent.
> 
> Another way is disabling RIP-relative code, but I failed when using
> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
> Is there any way to totally disable RIP-relative code? Besides, using
> this feature may specified to some newer C compiler. This may not be a
> good solution.
> 
> If we don't set %rsp and %rbp when executing emulator code, we can
> just use “push/pop" to save other general registers.

%rbp should not be a problem, on the other hand it's okay not to include
%rsp in the registers struct (and assume insn_page/alt_insn_page do not
touch it).  Interestingly, both VMX and SVM put the guest RSP in the VM
control information so that the switch occurs atomically with the start
of the guest.

Paolo

> If you have any better solutions, please let me know.

> Thanks,
> Arthur
> 
> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
> <yzt356@gmail.com> wrote:
>> On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>>>> Add a function trap_emulator to run an instruction in emulator.
>>>> Set inregs first (%rax is invalid because it is used as return
>>>> address), put instruction codec in alt_insn and call func with
>>>> alt_insn_length. Get results in outregs.
>>>>
>>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>>> ---
>>>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 81 insertions(+)
>>>>
>>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>>> index 96576e5..8ab9904 100644
>>>> --- a/x86/emulator.c
>>>> +++ b/x86/emulator.c
>>>> @@ -11,6 +11,14 @@ int fails, tests;
>>>>
>>>>  static int exceptions;
>>>>
>>>> +struct regs {
>>>> +     u64 rax, rbx, rcx, rdx;
>>>> +     u64 rsi, rdi, rsp, rbp;
>>>> +     u64 rip, rflags;
>>>> +};
>>>> +
>>>> +static struct regs inregs, outregs;
>>>> +
>>>>  void report(const char *name, int result)
>>>>  {
>>>>       ++tests;
>>>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>>  }
>>>>
>>>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>>>> +                          uint8_t *alt_insn_page, void *insn_ram,
>>>> +                          uint8_t *alt_insn, int alt_insn_length)
>>>> +{
>>>> +     ulong *cr3 = (ulong *)read_cr3();
>>>> +     int i;
>>>> +
>>>> +     // Pad with RET instructions
>>>> +     memset(insn_page, 0xc3, 4096);
>>>> +     memset(alt_insn_page, 0xc3, 4096);
>>>> +
>>>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>>>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>>>> +     insn_page[1] = 0x00;
>>>> +     insn_page[2] = 0x90; // nop
>>>> +     insn_page[3] = 0xc3; // ret
>>>> +
>>>> +     // Place the instruction we want the hypervisor to see in the alternate page
>>>> +     for (i=0; i<alt_insn_length; i++)
>>>> +             alt_insn_page[i] = alt_insn[i];
>>>> +
>>>> +     // Save general registers
>>>> +     asm volatile(
>>>> +             "push %rax\n\r"
>>>> +             "push %rbx\n\r"
>>>> +             "push %rcx\n\r"
>>>> +             "push %rdx\n\r"
>>>> +             "push %rsi\n\r"
>>>> +             "push %rdi\n\r"
>>>> +             );
>>>
>>> This will not work if GCC is using rsp-relative addresses to access
>>> local variables.  You need to use mov instructions to load from inregs,
>>> and put the push/pop sequences inside the "main" asm that does the "call
>>> *%1".
>> Is there any way to let gcc use absolute address to access variables?
>> I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> and it seems that addressing for "save" is wrong.
>>
>> Arthur
>>>
>>> Paolo
>>>
>>>> +     // Load the code TLB with insn_page, but point the page tables at
>>>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>>> +     // This will make the CPU trap on the insn_page instruction but the
>>>> +     // hypervisor will see alt_insn_page.
>>>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>>> +     invlpg(insn_ram);
>>>> +     // Load code TLB
>>>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>>>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>>> +     // Trap, let hypervisor emulate at alt_insn_page
>>>> +     asm volatile(
>>>> +             "call *%1\n\r"
>>>> +
>>>> +             "mov %%rax, 0+%[outregs] \n\t"
>>>> +             "mov %%rbx, 8+%[outregs] \n\t"
>>>> +             "mov %%rcx, 16+%[outregs] \n\t"
>>>> +             "mov %%rdx, 24+%[outregs] \n\t"
>>>> +             "mov %%rsi, 32+%[outregs] \n\t"
>>>> +             "mov %%rdi, 40+%[outregs] \n\t"
>>>> +             "mov %%rsp,48+ %[outregs] \n\t"
>>>> +             "mov %%rbp, 56+%[outregs] \n\t"
>>>> +
>>>> +             /* Save RFLAGS in outregs*/
>>>> +             "pushf \n\t"
>>>> +             "popq 72+%[outregs] \n\t"
>>>> +             : [outregs]"+m"(outregs)
>>>> +             : "r"(insn_ram),
>>>> +                     "a"(mem), "b"(inregs.rbx),
>>>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>>>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>>>> +             : "memory", "cc"
>>>> +             );
>>>> +     // Restore general registers
>>>> +     asm volatile(
>>>> +             "pop %rax\n\r"
>>>> +             "pop %rbx\n\r"
>>>> +             "pop %rcx\n\r"
>>>> +             "pop %rdx\n\r"
>>>> +             "pop %rsi\n\r"
>>>> +             "pop %rdi\n\r"
>>>> +             );
>>>> +}
>>>> +
>>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>>  {
>>>>      ++exceptions;
>>>>
>>>
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> 
> 


^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-13 15:16 Arthur Chunqi Li
  0 siblings, 0 replies; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-13 15:16 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, jan.kiszka, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |  132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/x86/emulator.c b/x86/emulator.c
index 96576e5..4981bfb 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,16 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 r8, r9, r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 rip, rflags;
+};
+static struct regs inregs, outregs;
+extern struct regs save;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +695,128 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+extern u8 insn_start[], insn_end[];
+extern u8 insn_emulate_start[], insn_emulate_end[];
+
+static void mk_insn_page(uint8_t *insn_page, uint8_t *alt_insn_page,
+				uint8_t *alt_insn, int alt_insn_length)
+{
+	int i, emul_offset;
+	for (i=1; i<insn_emulate_end - insn_emulate_start; i++)
+		insn_emulate_start[i] = 0x90; // nop
+	for (i=0; i<insn_end - insn_start; i++)
+		insn_page[i] = insn_start[i];
+	emul_offset = insn_emulate_start - insn_start;
+	for (i=0; i<alt_insn_length; i++)
+		alt_insn_page[i+emul_offset] = alt_insn[i];
+
+	asm volatile(
+		".pushsection .text.insn, \"ax\" \n\t"
+		"insn_start:\n\t"
+		"ret\n\t"
+
+		"push %%rax; push %%rbx\n\t"
+		"push %%rcx; push %%rdx\n\t"
+		"push %%rsi; push %%rdi\n\t"
+		"push %%rbp\n\t"
+		"push %%r8; push %%r9\n\t"
+		"push %%r10; push %%r11\n\t"
+		"push %%r12; push %%r13\n\t"
+		"push %%r14; push %%r15\n\t"
+		"pushf\n\t"
+
+		"push 136+%[save] \n\t"
+		"popf \n\t"
+		"mov 0+%[save], %%rax \n\t"
+		"mov 8+%[save], %%rbx \n\t"
+		"mov 16+%[save], %%rcx \n\t"
+		"mov 24+%[save], %%rdx \n\t"
+		"mov 32+%[save], %%rsi \n\t"
+		"mov 40+%[save], %%rdi \n\t"
+		"mov 56+%[save], %%rbp \n\t"
+		"mov 64+%[save], %%r8 \n\t"
+		"mov 72+%[save], %%r9 \n\t"
+		"mov 80+%[save], %%r10  \n\t"
+		"mov 88+%[save], %%r11 \n\t"
+		"mov 96+%[save], %%r12 \n\t"
+		"mov 104+%[save], %%r13 \n\t"
+		"mov 112+%[save], %%r14 \n\t"
+		"mov 120+%[save], %%r15 \n\t"
+
+		"insn_emulate_start:\n\t"
+		"in  (%%dx),%%al\n\t"
+		". = . + 31\n\t"
+		"insn_emulate_end:\n\t"
+
+		"pushf \n\t"
+		"pop 136+%[save] \n\t"
+		"mov %%rax, 0+%[save] \n\t"
+		"mov %%rbx, 8+%[save] \n\t"
+		"mov %%rcx, 16+%[save] \n\t"
+		"mov %%rdx, 24+%[save] \n\t"
+		"mov %%rsi, 32+%[save] \n\t"
+		"mov %%rdi, 40+%[save] \n\t"
+		"mov %%rbp, 56+%[save] \n\t"
+		"mov %%r8, 64+%[save]\n\t"
+		"mov %%r9, 72+%[save]\n\t"
+		"mov %%r10, 80+%[save]\n\t"
+		"mov %%r11, 88+%[save]\n\t"
+		"mov %%r12, 96+%[save]\n\t"
+		"mov %%r13, 104+%[save]\n\t"
+		"mov %%r14, 112+%[save]\n\t"
+		"mov %%r15, 120+%[save]\n\t"
+
+		"popf\n\t"
+		"pop %%r15; pop %%r14 \n\t"
+		"pop %%r13; pop %%r12 \n\t"
+		"pop %%r11; pop %%r10 \n\t"
+		"pop %%r9; pop %%r8 \n\t"
+		"pop %%rbp \n\t"
+		"pop %%rdi; pop %%rsi \n\t"
+		"pop %%rdx; pop %%rcx \n\t"
+		"pop %%rbx; pop %%rax \n\t"
+
+		"ret\n\t"
+		
+		"save:\n\t"
+		". = . + 256\n\t"
+		"insn_end:\n\t"
+		".popsection\n\t"
+		: [save]"=m"(save)
+		: : "memory", "cc"
+		);
+}
+
+static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
+			     uint8_t *alt_insn_page, void *insn_ram,
+			     uint8_t* alt_insn, int alt_insn_length, int reserve_stack)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	extern u8 insn_start[];
+	int save_offset = (u8 *)(&save) - insn_start;
+	
+	memset(insn_page, 0x90, 4096);
+	memset(alt_insn_page, 0x90, 4096);
+	
+	save = inregs;
+	mk_insn_page(insn_page, alt_insn_page,
+		alt_insn, alt_insn_length);
+	
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile("call *%0": : "r"(insn_ram+1));
+
+	outregs = *((struct regs *)(&alt_insn_page[save_offset]));
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-13  9:30     ` 李春奇 <Arthur Chunqi Li>
  2013-06-13 13:12       ` Paolo Bonzini
@ 2013-06-18 12:45       ` Gleb Natapov
  2013-06-18 13:40         ` 李春奇 <Arthur Chunqi Li>
  2013-06-18 14:28         ` 李春奇 <Arthur Chunqi Li>
  1 sibling, 2 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-18 12:45 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> Hi Gleb,
> I'm trying to solve these problems in the past days and meet many
> difficulties. You want to save all the general registers in calling
> insn_page, so registers should be saved to (save) in insn_page.
> Because all the instructions should be generated outside and copy to
> insn_page, and the instructions generated outside is RIP-relative, so
> inside insn_page (save) will be wrong pointed with RIP-relative code.
> 
They do not have to be generated outside. You can write code into
insn_page directly. Something like this outside of any functions:

asm(".align 4096\n\t"
    ".global insn_page\n\t"
    ".global insn_page_end\n\t"
    ".global test_insn\n\t"
    ".global test_insn_end\n\t"
    "insn_page:"
    "mov %%rax, outregs \n\t"
    ...
    "test_insn:\n\t" 
    "in (%ds), %al\n\t"
    ". = . + 31\n\t"
    "test_insn_end:\n\t"
    "mov outregs, %%rax\n\t"
    ...
    "ret\n\t"
    ".align 4096\n\t"
    "insn_page_end:\n\t");

Now you copy that into alt_insn_page, put instruction you want to test
into test_insn offset and remap alt_insn_page into "insn_page" virtual address.

> I have tried to move (save) into insn_page. But when calling
> insn_page, data in it can only be read and any instructions like "xchg
> %%rax, 0+%[save]" may cause error, because at this time read is from
> TLB but write will cause inconsistent.
> 
> Another way is disabling RIP-relative code, but I failed when using
> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
> Is there any way to totally disable RIP-relative code? Besides, using
> this feature may specified to some newer C compiler. This may not be a
> good solution.
> 
> If we don't set %rsp and %rbp when executing emulator code, we can
> just use “push/pop" to save other general registers.
> 
> If you have any better solutions, please let me know.
> 
> Thanks,
> Arthur
> 
> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
> <yzt356@gmail.com> wrote:
> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> >>> Add a function trap_emulator to run an instruction in emulator.
> >>> Set inregs first (%rax is invalid because it is used as return
> >>> address), put instruction codec in alt_insn and call func with
> >>> alt_insn_length. Get results in outregs.
> >>>
> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >>> ---
> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 81 insertions(+)
> >>>
> >>> diff --git a/x86/emulator.c b/x86/emulator.c
> >>> index 96576e5..8ab9904 100644
> >>> --- a/x86/emulator.c
> >>> +++ b/x86/emulator.c
> >>> @@ -11,6 +11,14 @@ int fails, tests;
> >>>
> >>>  static int exceptions;
> >>>
> >>> +struct regs {
> >>> +     u64 rax, rbx, rcx, rdx;
> >>> +     u64 rsi, rdi, rsp, rbp;
> >>> +     u64 rip, rflags;
> >>> +};
> >>> +
> >>> +static struct regs inregs, outregs;
> >>> +
> >>>  void report(const char *name, int result)
> >>>  {
> >>>       ++tests;
> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >>>  }
> >>>
> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
> >>> +                          uint8_t *alt_insn, int alt_insn_length)
> >>> +{
> >>> +     ulong *cr3 = (ulong *)read_cr3();
> >>> +     int i;
> >>> +
> >>> +     // Pad with RET instructions
> >>> +     memset(insn_page, 0xc3, 4096);
> >>> +     memset(alt_insn_page, 0xc3, 4096);
> >>> +
> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >>> +     insn_page[1] = 0x00;
> >>> +     insn_page[2] = 0x90; // nop
> >>> +     insn_page[3] = 0xc3; // ret
> >>> +
> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
> >>> +     for (i=0; i<alt_insn_length; i++)
> >>> +             alt_insn_page[i] = alt_insn[i];
> >>> +
> >>> +     // Save general registers
> >>> +     asm volatile(
> >>> +             "push %rax\n\r"
> >>> +             "push %rbx\n\r"
> >>> +             "push %rcx\n\r"
> >>> +             "push %rdx\n\r"
> >>> +             "push %rsi\n\r"
> >>> +             "push %rdi\n\r"
> >>> +             );
> >>
> >> This will not work if GCC is using rsp-relative addresses to access
> >> local variables.  You need to use mov instructions to load from inregs,
> >> and put the push/pop sequences inside the "main" asm that does the "call
> >> *%1".
> > Is there any way to let gcc use absolute address to access variables?
> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
> > and it seems that addressing for "save" is wrong.
> >
> > Arthur
> >>
> >> Paolo
> >>
> >>> +     // Load the code TLB with insn_page, but point the page tables at
> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>> +     // This will make the CPU trap on the insn_page instruction but the
> >>> +     // hypervisor will see alt_insn_page.
> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >>> +     invlpg(insn_ram);
> >>> +     // Load code TLB
> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >>> +     // Trap, let hypervisor emulate at alt_insn_page
> >>> +     asm volatile(
> >>> +             "call *%1\n\r"
> >>> +
> >>> +             "mov %%rax, 0+%[outregs] \n\t"
> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
> >>> +
> >>> +             /* Save RFLAGS in outregs*/
> >>> +             "pushf \n\t"
> >>> +             "popq 72+%[outregs] \n\t"
> >>> +             : [outregs]"+m"(outregs)
> >>> +             : "r"(insn_ram),
> >>> +                     "a"(mem), "b"(inregs.rbx),
> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
> >>> +             : "memory", "cc"
> >>> +             );
> >>> +     // Restore general registers
> >>> +     asm volatile(
> >>> +             "pop %rax\n\r"
> >>> +             "pop %rbx\n\r"
> >>> +             "pop %rcx\n\r"
> >>> +             "pop %rdx\n\r"
> >>> +             "pop %rsi\n\r"
> >>> +             "pop %rdi\n\r"
> >>> +             );
> >>> +}
> >>> +
> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >>>  {
> >>>      ++exceptions;
> >>>
> >>
> >
> >
> >
> > --
> > Arthur Chunqi Li
> > Department of Computer Science
> > School of EECS
> > Peking University
> > Beijing, China
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 12:45       ` Gleb Natapov
@ 2013-06-18 13:40         ` 李春奇 <Arthur Chunqi Li>
  2013-06-18 14:28         ` 李春奇 <Arthur Chunqi Li>
  1 sibling, 0 replies; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-18 13:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> I'm trying to solve these problems in the past days and meet many
>> difficulties. You want to save all the general registers in calling
>> insn_page, so registers should be saved to (save) in insn_page.
>> Because all the instructions should be generated outside and copy to
>> insn_page, and the instructions generated outside is RIP-relative, so
>> inside insn_page (save) will be wrong pointed with RIP-relative code.
>>
> They do not have to be generated outside. You can write code into
> insn_page directly. Something like this outside of any functions:
>
> asm(".align 4096\n\t"
>     ".global insn_page\n\t"
>     ".global insn_page_end\n\t"
>     ".global test_insn\n\t"
>     ".global test_insn_end\n\t"
>     "insn_page:"
>     "mov %%rax, outregs \n\t"
>     ...
>     "test_insn:\n\t"
>     "in (%ds), %al\n\t"
>     ". = . + 31\n\t"
>     "test_insn_end:\n\t"
>     "mov outregs, %%rax\n\t"
>     ...
>     "ret\n\t"
>     ".align 4096\n\t"
>     "insn_page_end:\n\t");
>
> Now you copy that into alt_insn_page, put instruction you want to test
> into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
Which function can be used to remap alt_insn_page into "insn_page"
virtual address?

Arthur
>
>> I have tried to move (save) into insn_page. But when calling
>> insn_page, data in it can only be read and any instructions like "xchg
>> %%rax, 0+%[save]" may cause error, because at this time read is from
>> TLB but write will cause inconsistent.
>>
>> Another way is disabling RIP-relative code, but I failed when using
>> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> Is there any way to totally disable RIP-relative code? Besides, using
>> this feature may specified to some newer C compiler. This may not be a
>> good solution.
>>
>> If we don't set %rsp and %rbp when executing emulator code, we can
>> just use “push/pop" to save other general registers.
>>
>> If you have any better solutions, please let me know.
>>
>> Thanks,
>> Arthur
>>
>> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
>> <yzt356@gmail.com> wrote:
>> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >>> Add a function trap_emulator to run an instruction in emulator.
>> >>> Set inregs first (%rax is invalid because it is used as return
>> >>> address), put instruction codec in alt_insn and call func with
>> >>> alt_insn_length. Get results in outregs.
>> >>>
>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >>> ---
>> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>  1 file changed, 81 insertions(+)
>> >>>
>> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >>> index 96576e5..8ab9904 100644
>> >>> --- a/x86/emulator.c
>> >>> +++ b/x86/emulator.c
>> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >>>
>> >>>  static int exceptions;
>> >>>
>> >>> +struct regs {
>> >>> +     u64 rax, rbx, rcx, rdx;
>> >>> +     u64 rsi, rdi, rsp, rbp;
>> >>> +     u64 rip, rflags;
>> >>> +};
>> >>> +
>> >>> +static struct regs inregs, outregs;
>> >>> +
>> >>>  void report(const char *name, int result)
>> >>>  {
>> >>>       ++tests;
>> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>>  }
>> >>>
>> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >>> +{
>> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >>> +     int i;
>> >>> +
>> >>> +     // Pad with RET instructions
>> >>> +     memset(insn_page, 0xc3, 4096);
>> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >>> +
>> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >>> +     insn_page[1] = 0x00;
>> >>> +     insn_page[2] = 0x90; // nop
>> >>> +     insn_page[3] = 0xc3; // ret
>> >>> +
>> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >>> +     for (i=0; i<alt_insn_length; i++)
>> >>> +             alt_insn_page[i] = alt_insn[i];
>> >>> +
>> >>> +     // Save general registers
>> >>> +     asm volatile(
>> >>> +             "push %rax\n\r"
>> >>> +             "push %rbx\n\r"
>> >>> +             "push %rcx\n\r"
>> >>> +             "push %rdx\n\r"
>> >>> +             "push %rsi\n\r"
>> >>> +             "push %rdi\n\r"
>> >>> +             );
>> >>
>> >> This will not work if GCC is using rsp-relative addresses to access
>> >> local variables.  You need to use mov instructions to load from inregs,
>> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> *%1".
>> > Is there any way to let gcc use absolute address to access variables?
>> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> > and it seems that addressing for "save" is wrong.
>> >
>> > Arthur
>> >>
>> >> Paolo
>> >>
>> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >>> +     // hypervisor will see alt_insn_page.
>> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >>> +     invlpg(insn_ram);
>> >>> +     // Load code TLB
>> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >>> +     asm volatile(
>> >>> +             "call *%1\n\r"
>> >>> +
>> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >>> +
>> >>> +             /* Save RFLAGS in outregs*/
>> >>> +             "pushf \n\t"
>> >>> +             "popq 72+%[outregs] \n\t"
>> >>> +             : [outregs]"+m"(outregs)
>> >>> +             : "r"(insn_ram),
>> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >>> +             : "memory", "cc"
>> >>> +             );
>> >>> +     // Restore general registers
>> >>> +     asm volatile(
>> >>> +             "pop %rax\n\r"
>> >>> +             "pop %rbx\n\r"
>> >>> +             "pop %rcx\n\r"
>> >>> +             "pop %rdx\n\r"
>> >>> +             "pop %rsi\n\r"
>> >>> +             "pop %rdi\n\r"
>> >>> +             );
>> >>> +}
>> >>> +
>> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>>  {
>> >>>      ++exceptions;
>> >>>
>> >>
>> >
>> >
>> >
>> > --
>> > Arthur Chunqi Li
>> > Department of Computer Science
>> > School of EECS
>> > Peking University
>> > Beijing, China
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 12:45       ` Gleb Natapov
  2013-06-18 13:40         ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-18 14:28         ` 李春奇 <Arthur Chunqi Li>
  2013-06-18 15:47           ` Gleb Natapov
  1 sibling, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-18 14:28 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> I'm trying to solve these problems in the past days and meet many
>> difficulties. You want to save all the general registers in calling
>> insn_page, so registers should be saved to (save) in insn_page.
>> Because all the instructions should be generated outside and copy to
>> insn_page, and the instructions generated outside is RIP-relative, so
>> inside insn_page (save) will be wrong pointed with RIP-relative code.
>>
> They do not have to be generated outside. You can write code into
> insn_page directly. Something like this outside of any functions:
>
> asm(".align 4096\n\t"
>     ".global insn_page\n\t"
>     ".global insn_page_end\n\t"
>     ".global test_insn\n\t"
>     ".global test_insn_end\n\t"
>     "insn_page:"
>     "mov %%rax, outregs \n\t"
>     ...
>     "test_insn:\n\t"
>     "in (%ds), %al\n\t"
>     ". = . + 31\n\t"
>     "test_insn_end:\n\t"
>     "mov outregs, %%rax\n\t"
>     ...
>     "ret\n\t"
>     ".align 4096\n\t"
>     "insn_page_end:\n\t");
>
> Now you copy that into alt_insn_page, put instruction you want to test
> into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
I used such codes:

invlpg((void *)virt_to_phys(insn_page));
asm volatile("call *%0" : : "r"(insn_page));
install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
asm volatile("call *%0": : "r"(insn_page+1));

But it seems that alt_insn_page are not remapped to insn_page. Here
insn_page and alt_insn_page are all declared statically with
"asm(...)".

Arthur
>
>> I have tried to move (save) into insn_page. But when calling
>> insn_page, data in it can only be read and any instructions like "xchg
>> %%rax, 0+%[save]" may cause error, because at this time read is from
>> TLB but write will cause inconsistent.
>>
>> Another way is disabling RIP-relative code, but I failed when using
>> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> Is there any way to totally disable RIP-relative code? Besides, using
>> this feature may specified to some newer C compiler. This may not be a
>> good solution.
>>
>> If we don't set %rsp and %rbp when executing emulator code, we can
>> just use “push/pop" to save other general registers.
>>
>> If you have any better solutions, please let me know.
>>
>> Thanks,
>> Arthur
>>
>> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
>> <yzt356@gmail.com> wrote:
>> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >>> Add a function trap_emulator to run an instruction in emulator.
>> >>> Set inregs first (%rax is invalid because it is used as return
>> >>> address), put instruction codec in alt_insn and call func with
>> >>> alt_insn_length. Get results in outregs.
>> >>>
>> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >>> ---
>> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>>  1 file changed, 81 insertions(+)
>> >>>
>> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >>> index 96576e5..8ab9904 100644
>> >>> --- a/x86/emulator.c
>> >>> +++ b/x86/emulator.c
>> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >>>
>> >>>  static int exceptions;
>> >>>
>> >>> +struct regs {
>> >>> +     u64 rax, rbx, rcx, rdx;
>> >>> +     u64 rsi, rdi, rsp, rbp;
>> >>> +     u64 rip, rflags;
>> >>> +};
>> >>> +
>> >>> +static struct regs inregs, outregs;
>> >>> +
>> >>>  void report(const char *name, int result)
>> >>>  {
>> >>>       ++tests;
>> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >>>  }
>> >>>
>> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >>> +{
>> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >>> +     int i;
>> >>> +
>> >>> +     // Pad with RET instructions
>> >>> +     memset(insn_page, 0xc3, 4096);
>> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >>> +
>> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >>> +     insn_page[1] = 0x00;
>> >>> +     insn_page[2] = 0x90; // nop
>> >>> +     insn_page[3] = 0xc3; // ret
>> >>> +
>> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >>> +     for (i=0; i<alt_insn_length; i++)
>> >>> +             alt_insn_page[i] = alt_insn[i];
>> >>> +
>> >>> +     // Save general registers
>> >>> +     asm volatile(
>> >>> +             "push %rax\n\r"
>> >>> +             "push %rbx\n\r"
>> >>> +             "push %rcx\n\r"
>> >>> +             "push %rdx\n\r"
>> >>> +             "push %rsi\n\r"
>> >>> +             "push %rdi\n\r"
>> >>> +             );
>> >>
>> >> This will not work if GCC is using rsp-relative addresses to access
>> >> local variables.  You need to use mov instructions to load from inregs,
>> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> *%1".
>> > Is there any way to let gcc use absolute address to access variables?
>> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> > and it seems that addressing for "save" is wrong.
>> >
>> > Arthur
>> >>
>> >> Paolo
>> >>
>> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >>> +     // hypervisor will see alt_insn_page.
>> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >>> +     invlpg(insn_ram);
>> >>> +     // Load code TLB
>> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >>> +     asm volatile(
>> >>> +             "call *%1\n\r"
>> >>> +
>> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >>> +
>> >>> +             /* Save RFLAGS in outregs*/
>> >>> +             "pushf \n\t"
>> >>> +             "popq 72+%[outregs] \n\t"
>> >>> +             : [outregs]"+m"(outregs)
>> >>> +             : "r"(insn_ram),
>> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >>> +             : "memory", "cc"
>> >>> +             );
>> >>> +     // Restore general registers
>> >>> +     asm volatile(
>> >>> +             "pop %rax\n\r"
>> >>> +             "pop %rbx\n\r"
>> >>> +             "pop %rcx\n\r"
>> >>> +             "pop %rdx\n\r"
>> >>> +             "pop %rsi\n\r"
>> >>> +             "pop %rdi\n\r"
>> >>> +             );
>> >>> +}
>> >>> +
>> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >>>  {
>> >>>      ++exceptions;
>> >>>
>> >>
>> >
>> >
>> >
>> > --
>> > Arthur Chunqi Li
>> > Department of Computer Science
>> > School of EECS
>> > Peking University
>> > Beijing, China
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 14:28         ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-18 15:47           ` Gleb Natapov
  2013-06-18 15:56             ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-18 15:47 UTC (permalink / raw)
  To: �??�?��? <Arthur Chunqi Li>
  Cc: Paolo Bonzini, kvm

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=utf-8, Size: 8765 bytes --]

On Tue, Jun 18, 2013 at 10:28:59PM +0800, æ??æ?¥å¥? <Arthur Chunqi Li> wrote:
> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> Hi Gleb,
> >> I'm trying to solve these problems in the past days and meet many
> >> difficulties. You want to save all the general registers in calling
> >> insn_page, so registers should be saved to (save) in insn_page.
> >> Because all the instructions should be generated outside and copy to
> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >>
> > They do not have to be generated outside. You can write code into
> > insn_page directly. Something like this outside of any functions:
> >
> > asm(".align 4096\n\t"
> >     ".global insn_page\n\t"
> >     ".global insn_page_end\n\t"
> >     ".global test_insn\n\t"
> >     ".global test_insn_end\n\t"
> >     "insn_page:"
> >     "mov %%rax, outregs \n\t"
> >     ...
> >     "test_insn:\n\t"
> >     "in (%ds), %al\n\t"
> >     ". = . + 31\n\t"
> >     "test_insn_end:\n\t"
> >     "mov outregs, %%rax\n\t"
> >     ...
> >     "ret\n\t"
> >     ".align 4096\n\t"
> >     "insn_page_end:\n\t");
> >
> > Now you copy that into alt_insn_page, put instruction you want to test
> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> I used such codes:
> 
> invlpg((void *)virt_to_phys(insn_page));
virt_to_phys?

> asm volatile("call *%0" : : "r"(insn_page));
> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> asm volatile("call *%0": : "r"(insn_page+1));
+1?

> 
> But it seems that alt_insn_page are not remapped to insn_page. Here
> insn_page and alt_insn_page are all declared statically with
> "asm(...)".
> 
> Arthur
> >
> >> I have tried to move (save) into insn_page. But when calling
> >> insn_page, data in it can only be read and any instructions like "xchg
> >> %%rax, 0+%[save]" may cause error, because at this time read is from
> >> TLB but write will cause inconsistent.
> >>
> >> Another way is disabling RIP-relative code, but I failed when using
> >> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
> >> Is there any way to totally disable RIP-relative code? Besides, using
> >> this feature may specified to some newer C compiler. This may not be a
> >> good solution.
> >>
> >> If we don't set %rsp and %rbp when executing emulator code, we can
> >> just use “push/pop" to save other general registers.
> >>
> >> If you have any better solutions, please let me know.
> >>
> >> Thanks,
> >> Arthur
> >>
> >> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
> >> <yzt356@gmail.com> wrote:
> >> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> >> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
> >> >>> Add a function trap_emulator to run an instruction in emulator.
> >> >>> Set inregs first (%rax is invalid because it is used as return
> >> >>> address), put instruction codec in alt_insn and call func with
> >> >>> alt_insn_length. Get results in outregs.
> >> >>>
> >> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >> >>> ---
> >> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >> >>>  1 file changed, 81 insertions(+)
> >> >>>
> >> >>> diff --git a/x86/emulator.c b/x86/emulator.c
> >> >>> index 96576e5..8ab9904 100644
> >> >>> --- a/x86/emulator.c
> >> >>> +++ b/x86/emulator.c
> >> >>> @@ -11,6 +11,14 @@ int fails, tests;
> >> >>>
> >> >>>  static int exceptions;
> >> >>>
> >> >>> +struct regs {
> >> >>> +     u64 rax, rbx, rcx, rdx;
> >> >>> +     u64 rsi, rdi, rsp, rbp;
> >> >>> +     u64 rip, rflags;
> >> >>> +};
> >> >>> +
> >> >>> +static struct regs inregs, outregs;
> >> >>> +
> >> >>>  void report(const char *name, int result)
> >> >>>  {
> >> >>>       ++tests;
> >> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
> >> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >> >>>  }
> >> >>>
> >> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
> >> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
> >> >>> +                          uint8_t *alt_insn, int alt_insn_length)
> >> >>> +{
> >> >>> +     ulong *cr3 = (ulong *)read_cr3();
> >> >>> +     int i;
> >> >>> +
> >> >>> +     // Pad with RET instructions
> >> >>> +     memset(insn_page, 0xc3, 4096);
> >> >>> +     memset(alt_insn_page, 0xc3, 4096);
> >> >>> +
> >> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
> >> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
> >> >>> +     insn_page[1] = 0x00;
> >> >>> +     insn_page[2] = 0x90; // nop
> >> >>> +     insn_page[3] = 0xc3; // ret
> >> >>> +
> >> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
> >> >>> +     for (i=0; i<alt_insn_length; i++)
> >> >>> +             alt_insn_page[i] = alt_insn[i];
> >> >>> +
> >> >>> +     // Save general registers
> >> >>> +     asm volatile(
> >> >>> +             "push %rax\n\r"
> >> >>> +             "push %rbx\n\r"
> >> >>> +             "push %rcx\n\r"
> >> >>> +             "push %rdx\n\r"
> >> >>> +             "push %rsi\n\r"
> >> >>> +             "push %rdi\n\r"
> >> >>> +             );
> >> >>
> >> >> This will not work if GCC is using rsp-relative addresses to access
> >> >> local variables.  You need to use mov instructions to load from inregs,
> >> >> and put the push/pop sequences inside the "main" asm that does the "call
> >> >> *%1".
> >> > Is there any way to let gcc use absolute address to access variables?
> >> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
> >> > and it seems that addressing for "save" is wrong.
> >> >
> >> > Arthur
> >> >>
> >> >> Paolo
> >> >>
> >> >>> +     // Load the code TLB with insn_page, but point the page tables at
> >> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> >>> +     // This will make the CPU trap on the insn_page instruction but the
> >> >>> +     // hypervisor will see alt_insn_page.
> >> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >> >>> +     invlpg(insn_ram);
> >> >>> +     // Load code TLB
> >> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
> >> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >> >>> +     // Trap, let hypervisor emulate at alt_insn_page
> >> >>> +     asm volatile(
> >> >>> +             "call *%1\n\r"
> >> >>> +
> >> >>> +             "mov %%rax, 0+%[outregs] \n\t"
> >> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
> >> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
> >> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
> >> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
> >> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
> >> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
> >> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
> >> >>> +
> >> >>> +             /* Save RFLAGS in outregs*/
> >> >>> +             "pushf \n\t"
> >> >>> +             "popq 72+%[outregs] \n\t"
> >> >>> +             : [outregs]"+m"(outregs)
> >> >>> +             : "r"(insn_ram),
> >> >>> +                     "a"(mem), "b"(inregs.rbx),
> >> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
> >> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
> >> >>> +             : "memory", "cc"
> >> >>> +             );
> >> >>> +     // Restore general registers
> >> >>> +     asm volatile(
> >> >>> +             "pop %rax\n\r"
> >> >>> +             "pop %rbx\n\r"
> >> >>> +             "pop %rcx\n\r"
> >> >>> +             "pop %rdx\n\r"
> >> >>> +             "pop %rsi\n\r"
> >> >>> +             "pop %rdi\n\r"
> >> >>> +             );
> >> >>> +}
> >> >>> +
> >> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >> >>>  {
> >> >>>      ++exceptions;
> >> >>>
> >> >>
> >> >
> >> >
> >> >
> >> > --
> >> > Arthur Chunqi Li
> >> > Department of Computer Science
> >> > School of EECS
> >> > Peking University
> >> > Beijing, China
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 15:47           ` Gleb Natapov
@ 2013-06-18 15:56             ` 李春奇 <Arthur Chunqi Li>
  2013-06-18 16:09               ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-18 15:56 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
>> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> Hi Gleb,
>> >> I'm trying to solve these problems in the past days and meet many
>> >> difficulties. You want to save all the general registers in calling
>> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> Because all the instructions should be generated outside and copy to
>> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >>
>> > They do not have to be generated outside. You can write code into
>> > insn_page directly. Something like this outside of any functions:
>> >
>> > asm(".align 4096\n\t"
>> >     ".global insn_page\n\t"
>> >     ".global insn_page_end\n\t"
>> >     ".global test_insn\n\t"
>> >     ".global test_insn_end\n\t"
>> >     "insn_page:"
>> >     "mov %%rax, outregs \n\t"
>> >     ...
>> >     "test_insn:\n\t"
>> >     "in (%ds), %al\n\t"
>> >     ". = . + 31\n\t"
>> >     "test_insn_end:\n\t"
>> >     "mov outregs, %%rax\n\t"
>> >     ...
>> >     "ret\n\t"
>> >     ".align 4096\n\t"
>> >     "insn_page_end:\n\t");
>> >
>> > Now you copy that into alt_insn_page, put instruction you want to test
>> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> I used such codes:
>>
>> invlpg((void *)virt_to_phys(insn_page));
> virt_to_phys?
This is a mistake, I changed it to "invlpg(insn_page)" but the result
is the same.
>
>> asm volatile("call *%0" : : "r"(insn_page));
>> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> asm volatile("call *%0": : "r"(insn_page+1));
> +1?
Here I put "ret" on the first byte of insn_page, so the first call of
"insn_page" can just return, and the second call of "insn_page+1“ will
directly call the second byte, which is the real content of insn_page.
>
>>
>> But it seems that alt_insn_page are not remapped to insn_page. Here
>> insn_page and alt_insn_page are all declared statically with
>> "asm(...)".
>>
>> Arthur
>> >
>> >> I have tried to move (save) into insn_page. But when calling
>> >> insn_page, data in it can only be read and any instructions like "xchg
>> >> %%rax, 0+%[save]" may cause error, because at this time read is from
>> >> TLB but write will cause inconsistent.
>> >>
>> >> Another way is disabling RIP-relative code, but I failed when using
>> >> "-mcmodel-large -fno-pic", the binary is also using RIP-relative mode.
>> >> Is there any way to totally disable RIP-relative code? Besides, using
>> >> this feature may specified to some newer C compiler. This may not be a
>> >> good solution.
>> >>
>> >> If we don't set %rsp and %rbp when executing emulator code, we can
>> >> just use “push/pop" to save other general registers.
>> >>
>> >> If you have any better solutions, please let me know.
>> >>
>> >> Thanks,
>> >> Arthur
>> >>
>> >> On Thu, Jun 13, 2013 at 12:50 PM, 李春奇 <Arthur Chunqi Li>
>> >> <yzt356@gmail.com> wrote:
>> >> > On Thu, Jun 13, 2013 at 4:50 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> >> >> Il 06/06/2013 11:24, Arthur Chunqi Li ha scritto:
>> >> >>> Add a function trap_emulator to run an instruction in emulator.
>> >> >>> Set inregs first (%rax is invalid because it is used as return
>> >> >>> address), put instruction codec in alt_insn and call func with
>> >> >>> alt_insn_length. Get results in outregs.
>> >> >>>
>> >> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> >> >>> ---
>> >> >>>  x86/emulator.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> >> >>>  1 file changed, 81 insertions(+)
>> >> >>>
>> >> >>> diff --git a/x86/emulator.c b/x86/emulator.c
>> >> >>> index 96576e5..8ab9904 100644
>> >> >>> --- a/x86/emulator.c
>> >> >>> +++ b/x86/emulator.c
>> >> >>> @@ -11,6 +11,14 @@ int fails, tests;
>> >> >>>
>> >> >>>  static int exceptions;
>> >> >>>
>> >> >>> +struct regs {
>> >> >>> +     u64 rax, rbx, rcx, rdx;
>> >> >>> +     u64 rsi, rdi, rsp, rbp;
>> >> >>> +     u64 rip, rflags;
>> >> >>> +};
>> >> >>> +
>> >> >>> +static struct regs inregs, outregs;
>> >> >>> +
>> >> >>>  void report(const char *name, int result)
>> >> >>>  {
>> >> >>>       ++tests;
>> >> >>> @@ -685,6 +693,79 @@ static void test_shld_shrd(u32 *mem)
>> >> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> >> >>>  }
>> >> >>>
>> >> >>> +static void trap_emulator(uint64_t *mem, uint8_t *insn_page,
>> >> >>> +                          uint8_t *alt_insn_page, void *insn_ram,
>> >> >>> +                          uint8_t *alt_insn, int alt_insn_length)
>> >> >>> +{
>> >> >>> +     ulong *cr3 = (ulong *)read_cr3();
>> >> >>> +     int i;
>> >> >>> +
>> >> >>> +     // Pad with RET instructions
>> >> >>> +     memset(insn_page, 0xc3, 4096);
>> >> >>> +     memset(alt_insn_page, 0xc3, 4096);
>> >> >>> +
>> >> >>> +     // Place a trapping instruction in the page to trigger a VMEXIT
>> >> >>> +     insn_page[0] = 0x89; // mov %eax, (%rax)
>> >> >>> +     insn_page[1] = 0x00;
>> >> >>> +     insn_page[2] = 0x90; // nop
>> >> >>> +     insn_page[3] = 0xc3; // ret
>> >> >>> +
>> >> >>> +     // Place the instruction we want the hypervisor to see in the alternate page
>> >> >>> +     for (i=0; i<alt_insn_length; i++)
>> >> >>> +             alt_insn_page[i] = alt_insn[i];
>> >> >>> +
>> >> >>> +     // Save general registers
>> >> >>> +     asm volatile(
>> >> >>> +             "push %rax\n\r"
>> >> >>> +             "push %rbx\n\r"
>> >> >>> +             "push %rcx\n\r"
>> >> >>> +             "push %rdx\n\r"
>> >> >>> +             "push %rsi\n\r"
>> >> >>> +             "push %rdi\n\r"
>> >> >>> +             );
>> >> >>
>> >> >> This will not work if GCC is using rsp-relative addresses to access
>> >> >> local variables.  You need to use mov instructions to load from inregs,
>> >> >> and put the push/pop sequences inside the "main" asm that does the "call
>> >> >> *%1".
>> >> > Is there any way to let gcc use absolute address to access variables?
>> >> > I move variant "save" to the global and use "xchg %%rax, 0+%[save]"
>> >> > and it seems that addressing for "save" is wrong.
>> >> >
>> >> > Arthur
>> >> >>
>> >> >> Paolo
>> >> >>
>> >> >>> +     // Load the code TLB with insn_page, but point the page tables at
>> >> >>> +     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> >>> +     // This will make the CPU trap on the insn_page instruction but the
>> >> >>> +     // hypervisor will see alt_insn_page.
>> >> >>> +     install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> >> >>> +     invlpg(insn_ram);
>> >> >>> +     // Load code TLB
>> >> >>> +     asm volatile("call *%0" : : "r"(insn_ram + 3));
>> >> >>> +     install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> >> >>> +     // Trap, let hypervisor emulate at alt_insn_page
>> >> >>> +     asm volatile(
>> >> >>> +             "call *%1\n\r"
>> >> >>> +
>> >> >>> +             "mov %%rax, 0+%[outregs] \n\t"
>> >> >>> +             "mov %%rbx, 8+%[outregs] \n\t"
>> >> >>> +             "mov %%rcx, 16+%[outregs] \n\t"
>> >> >>> +             "mov %%rdx, 24+%[outregs] \n\t"
>> >> >>> +             "mov %%rsi, 32+%[outregs] \n\t"
>> >> >>> +             "mov %%rdi, 40+%[outregs] \n\t"
>> >> >>> +             "mov %%rsp,48+ %[outregs] \n\t"
>> >> >>> +             "mov %%rbp, 56+%[outregs] \n\t"
>> >> >>> +
>> >> >>> +             /* Save RFLAGS in outregs*/
>> >> >>> +             "pushf \n\t"
>> >> >>> +             "popq 72+%[outregs] \n\t"
>> >> >>> +             : [outregs]"+m"(outregs)
>> >> >>> +             : "r"(insn_ram),
>> >> >>> +                     "a"(mem), "b"(inregs.rbx),
>> >> >>> +                     "c"(inregs.rcx), "d"(inregs.rdx),
>> >> >>> +                     "S"(inregs.rsi), "D"(inregs.rdi)
>> >> >>> +             : "memory", "cc"
>> >> >>> +             );
>> >> >>> +     // Restore general registers
>> >> >>> +     asm volatile(
>> >> >>> +             "pop %rax\n\r"
>> >> >>> +             "pop %rbx\n\r"
>> >> >>> +             "pop %rcx\n\r"
>> >> >>> +             "pop %rdx\n\r"
>> >> >>> +             "pop %rsi\n\r"
>> >> >>> +             "pop %rdi\n\r"
>> >> >>> +             );
>> >> >>> +}
>> >> >>> +
>> >> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> >> >>>  {
>> >> >>>      ++exceptions;
>> >> >>>
>> >> >>
>> >> >
>> >> >
>> >> >
>> >> > --
>> >> > Arthur Chunqi Li
>> >> > Department of Computer Science
>> >> > School of EECS
>> >> > Peking University
>> >> > Beijing, China
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 15:56             ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-18 16:09               ` Gleb Natapov
  2013-06-18 16:14                 ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-18 16:09 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> Hi Gleb,
> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> difficulties. You want to save all the general registers in calling
> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> Because all the instructions should be generated outside and copy to
> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >>
> >> > They do not have to be generated outside. You can write code into
> >> > insn_page directly. Something like this outside of any functions:
> >> >
> >> > asm(".align 4096\n\t"
> >> >     ".global insn_page\n\t"
> >> >     ".global insn_page_end\n\t"
> >> >     ".global test_insn\n\t"
> >> >     ".global test_insn_end\n\t"
> >> >     "insn_page:"
> >> >     "mov %%rax, outregs \n\t"
> >> >     ...
> >> >     "test_insn:\n\t"
> >> >     "in (%ds), %al\n\t"
> >> >     ". = . + 31\n\t"
> >> >     "test_insn_end:\n\t"
> >> >     "mov outregs, %%rax\n\t"
> >> >     ...
> >> >     "ret\n\t"
> >> >     ".align 4096\n\t"
> >> >     "insn_page_end:\n\t");
> >> >
> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> I used such codes:
> >>
> >> invlpg((void *)virt_to_phys(insn_page));
> > virt_to_phys?
> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> is the same.
> >
> >> asm volatile("call *%0" : : "r"(insn_page));
> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> asm volatile("call *%0": : "r"(insn_page+1));
> > +1?
> Here I put "ret" on the first byte of insn_page, so the first call of
> "insn_page" can just return, and the second call of "insn_page+1“ will
> directly call the second byte, which is the real content of insn_page.
Send the code.

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 16:09               ` Gleb Natapov
@ 2013-06-18 16:14                 ` 李春奇 <Arthur Chunqi Li>
  2013-06-18 16:44                   ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-18 16:14 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

extern u8 insn_page[], insn_page_end[];
extern u8 test_insn[], test_insn_end[];
extern u8 alt_insn_page[];

asm(
".align 4096\n\t"
".global insn_page\n\t"
".global insn_page_end\n\t"
".global test_insn\n\t"
".global test_insn_end\n\t"
"insn_page:\n\t"

"ret \n\t"

"push %rax; push %rbx\n\t"
"push %rcx; push %rdx\n\t"
"push %rsi; push %rdi\n\t"
"push %rbp\n\t"
"push %r8; push %r9\n\t"
"push %r10; push %r11\n\t"
"push %r12; push %r13\n\t"
"push %r14; push %r15\n\t"
"pushf\n\t"

"push 136+save \n\t"
"popf \n\t"
"mov 0+save, %rax \n\t"
"mov 8+save, %rbx \n\t"
"mov 16+save, %rcx \n\t"
"mov 24+save, %rdx \n\t"
"mov 32+save, %rsi \n\t"
"mov 40+save, %rdi \n\t"
"mov 56+save, %rbp \n\t"
"mov 64+save, %r8 \n\t"
"mov 72+save, %r9 \n\t"
"mov 80+save, %r10  \n\t"
"mov 88+save, %r11 \n\t"
"mov 96+save, %r12 \n\t"
"mov 104+save, %r13 \n\t"
"mov 112+save, %r14 \n\t"
"mov 120+save, %r15 \n\t"

"test_insn:\n\t"
"in  (%dx),%al\n\t"
". = . + 31\n\t"
"test_insn_end:\n\t"

"pushf \n\t"
"pop 136+save \n\t"
"mov %rax, 0+save \n\t"
"mov %rbx, 8+save \n\t"
"mov %rcx, 16+save \n\t"
"mov %rdx, 24+save \n\t"
"mov %rsi, 32+save \n\t"
"mov %rdi, 40+save \n\t"
"mov %rbp, 56+save \n\t"
"mov %r8, 64+save \n\t"
"mov %r9, 72+save \n\t"
"mov %r10, 80+save \n\t"
"mov %r11, 88+save \n\t"
"mov %r12, 96+save \n\t"
"mov %r13, 104+save \n\t"
"mov %r14, 112+save \n\t"
"mov %r15, 120+save \n\t"
"popf \n\t"
"pop %r15; pop %r14 \n\t"
"pop %r13; pop %r12 \n\t"
"pop %r11; pop %r10 \n\t"
"pop %r9; pop %r8 \n\t"
"pop %rbp \n\t"
"pop %rdi; pop %rsi \n\t"
"pop %rdx; pop %rcx \n\t"
"pop %rbx; pop %rax \n\t"

"ret\n\t"
"save:\n\t"
". = . + 256\n\t"
".align 4096\n\t"
"alt_insn_page:\n\t"
". = . + 4096\n\t"
);


static void mk_insn_page(uint8_t *alt_insn_page,
uint8_t *alt_insn, int alt_insn_length)
{
    int i, emul_offset;
    for (i=1; i<test_insn_end - test_insn; i++)
        test_insn[i] = 0x90; // nop
    emul_offset = test_insn - insn_page;
    for (i=0; i<alt_insn_length; i++)
        alt_insn_page[i+emul_offset] = alt_insn[i];
}

static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
{
    ulong *cr3 = (ulong *)read_cr3();
    int save_offset = (u8 *)(&save) - insn_page;

    memset(alt_insn_page, 0x90, 4096);
    save = inregs;
    mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
    // Load the code TLB with insn_page, but point the page tables at
    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
    // This will make the CPU trap on the insn_page instruction but the
    // hypervisor will see alt_insn_page.
    //install_page(cr3, virt_to_phys(insn_page), insn_page);
    invlpg(insn_page);
    // Load code TLB
    asm volatile("call *%0" : : "r"(insn_page));
    install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
    // Trap, let hypervisor emulate at alt_insn_page
    asm volatile("call *%0": : "r"(insn_page+1));

    outregs = *((struct regs *)(&alt_insn_page[save_offset]));
}

static void test_movabs(uint64_t *mem)
{
    // mov $0xc3c3c3c3c3c3c3c3, %rcx
    uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
                                0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
    inregs = (struct regs){ 0 };
    trap_emulator(mem, alt_insn, 10);
    report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
}

On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
>> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> Hi Gleb,
>> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> difficulties. You want to save all the general registers in calling
>> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> Because all the instructions should be generated outside and copy to
>> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >>
>> >> > They do not have to be generated outside. You can write code into
>> >> > insn_page directly. Something like this outside of any functions:
>> >> >
>> >> > asm(".align 4096\n\t"
>> >> >     ".global insn_page\n\t"
>> >> >     ".global insn_page_end\n\t"
>> >> >     ".global test_insn\n\t"
>> >> >     ".global test_insn_end\n\t"
>> >> >     "insn_page:"
>> >> >     "mov %%rax, outregs \n\t"
>> >> >     ...
>> >> >     "test_insn:\n\t"
>> >> >     "in (%ds), %al\n\t"
>> >> >     ". = . + 31\n\t"
>> >> >     "test_insn_end:\n\t"
>> >> >     "mov outregs, %%rax\n\t"
>> >> >     ...
>> >> >     "ret\n\t"
>> >> >     ".align 4096\n\t"
>> >> >     "insn_page_end:\n\t");
>> >> >
>> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> I used such codes:
>> >>
>> >> invlpg((void *)virt_to_phys(insn_page));
>> > virt_to_phys?
>> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> is the same.
>> >
>> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> asm volatile("call *%0": : "r"(insn_page+1));
>> > +1?
>> Here I put "ret" on the first byte of insn_page, so the first call of
>> "insn_page" can just return, and the second call of "insn_page+1“ will
>> directly call the second byte, which is the real content of insn_page.
> Send the code.
>
> --
>                         Gleb.



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 16:14                 ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-18 16:44                   ` Gleb Natapov
  2013-06-19  1:26                     ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-18 16:44 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

Send code in a form of a patch.

On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> extern u8 insn_page[], insn_page_end[];
> extern u8 test_insn[], test_insn_end[];
> extern u8 alt_insn_page[];
> 
> asm(
> ".align 4096\n\t"
> ".global insn_page\n\t"
> ".global insn_page_end\n\t"
> ".global test_insn\n\t"
> ".global test_insn_end\n\t"
> "insn_page:\n\t"
> 
> "ret \n\t"
> 
> "push %rax; push %rbx\n\t"
> "push %rcx; push %rdx\n\t"
> "push %rsi; push %rdi\n\t"
> "push %rbp\n\t"
> "push %r8; push %r9\n\t"
> "push %r10; push %r11\n\t"
> "push %r12; push %r13\n\t"
> "push %r14; push %r15\n\t"
> "pushf\n\t"
> 
> "push 136+save \n\t"
> "popf \n\t"
> "mov 0+save, %rax \n\t"
> "mov 8+save, %rbx \n\t"
> "mov 16+save, %rcx \n\t"
> "mov 24+save, %rdx \n\t"
> "mov 32+save, %rsi \n\t"
> "mov 40+save, %rdi \n\t"
> "mov 56+save, %rbp \n\t"
> "mov 64+save, %r8 \n\t"
> "mov 72+save, %r9 \n\t"
> "mov 80+save, %r10  \n\t"
> "mov 88+save, %r11 \n\t"
> "mov 96+save, %r12 \n\t"
> "mov 104+save, %r13 \n\t"
> "mov 112+save, %r14 \n\t"
> "mov 120+save, %r15 \n\t"
> 
> "test_insn:\n\t"
> "in  (%dx),%al\n\t"
> ". = . + 31\n\t"
> "test_insn_end:\n\t"
> 
> "pushf \n\t"
> "pop 136+save \n\t"
> "mov %rax, 0+save \n\t"
> "mov %rbx, 8+save \n\t"
> "mov %rcx, 16+save \n\t"
> "mov %rdx, 24+save \n\t"
> "mov %rsi, 32+save \n\t"
> "mov %rdi, 40+save \n\t"
> "mov %rbp, 56+save \n\t"
> "mov %r8, 64+save \n\t"
> "mov %r9, 72+save \n\t"
> "mov %r10, 80+save \n\t"
> "mov %r11, 88+save \n\t"
> "mov %r12, 96+save \n\t"
> "mov %r13, 104+save \n\t"
> "mov %r14, 112+save \n\t"
> "mov %r15, 120+save \n\t"
> "popf \n\t"
> "pop %r15; pop %r14 \n\t"
> "pop %r13; pop %r12 \n\t"
> "pop %r11; pop %r10 \n\t"
> "pop %r9; pop %r8 \n\t"
> "pop %rbp \n\t"
> "pop %rdi; pop %rsi \n\t"
> "pop %rdx; pop %rcx \n\t"
> "pop %rbx; pop %rax \n\t"
> 
> "ret\n\t"
> "save:\n\t"
> ". = . + 256\n\t"
> ".align 4096\n\t"
> "alt_insn_page:\n\t"
> ". = . + 4096\n\t"
> );
> 
> 
> static void mk_insn_page(uint8_t *alt_insn_page,
> uint8_t *alt_insn, int alt_insn_length)
> {
>     int i, emul_offset;
>     for (i=1; i<test_insn_end - test_insn; i++)
>         test_insn[i] = 0x90; // nop
Why? Gcc should pad it with nops.

>     emul_offset = test_insn - insn_page;
>     for (i=0; i<alt_insn_length; i++)
>         alt_insn_page[i+emul_offset] = alt_insn[i];
> }
> 
> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> {
>     ulong *cr3 = (ulong *)read_cr3();
>     int save_offset = (u8 *)(&save) - insn_page;
> 
>     memset(alt_insn_page, 0x90, 4096);
alt_insn_page should contains the same instruction as insn_page except
between test_insn and test_insn_end. I do not know how you expect it to
work otherwise.

>     save = inregs;
>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>     // Load the code TLB with insn_page, but point the page tables at
>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>     // This will make the CPU trap on the insn_page instruction but the
>     // hypervisor will see alt_insn_page.
>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>     invlpg(insn_page);
>     // Load code TLB
>     asm volatile("call *%0" : : "r"(insn_page));
>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>     // Trap, let hypervisor emulate at alt_insn_page
>     asm volatile("call *%0": : "r"(insn_page+1));
> 
>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> }
> 
> static void test_movabs(uint64_t *mem)
> {
>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>     inregs = (struct regs){ 0 };
>     trap_emulator(mem, alt_insn, 10);
>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> }
> 
> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> Hi Gleb,
> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >>
> >> >> > They do not have to be generated outside. You can write code into
> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >
> >> >> > asm(".align 4096\n\t"
> >> >> >     ".global insn_page\n\t"
> >> >> >     ".global insn_page_end\n\t"
> >> >> >     ".global test_insn\n\t"
> >> >> >     ".global test_insn_end\n\t"
> >> >> >     "insn_page:"
> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >     ...
> >> >> >     "test_insn:\n\t"
> >> >> >     "in (%ds), %al\n\t"
> >> >> >     ". = . + 31\n\t"
> >> >> >     "test_insn_end:\n\t"
> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >     ...
> >> >> >     "ret\n\t"
> >> >> >     ".align 4096\n\t"
> >> >> >     "insn_page_end:\n\t");
> >> >> >
> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> I used such codes:
> >> >>
> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> > virt_to_phys?
> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> is the same.
> >> >
> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> > +1?
> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> directly call the second byte, which is the real content of insn_page.
> > Send the code.
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-18 16:44                   ` Gleb Natapov
@ 2013-06-19  1:26                     ` 李春奇 <Arthur Chunqi Li>
  2013-06-19  9:31                       ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19  1:26 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> Send code in a form of a patch.
>
> On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> extern u8 insn_page[], insn_page_end[];
>> extern u8 test_insn[], test_insn_end[];
>> extern u8 alt_insn_page[];
>>
>> asm(
>> ".align 4096\n\t"
>> ".global insn_page\n\t"
>> ".global insn_page_end\n\t"
>> ".global test_insn\n\t"
>> ".global test_insn_end\n\t"
>> "insn_page:\n\t"
>>
>> "ret \n\t"
>>
>> "push %rax; push %rbx\n\t"
>> "push %rcx; push %rdx\n\t"
>> "push %rsi; push %rdi\n\t"
>> "push %rbp\n\t"
>> "push %r8; push %r9\n\t"
>> "push %r10; push %r11\n\t"
>> "push %r12; push %r13\n\t"
>> "push %r14; push %r15\n\t"
>> "pushf\n\t"
>>
>> "push 136+save \n\t"
>> "popf \n\t"
>> "mov 0+save, %rax \n\t"
>> "mov 8+save, %rbx \n\t"
>> "mov 16+save, %rcx \n\t"
>> "mov 24+save, %rdx \n\t"
>> "mov 32+save, %rsi \n\t"
>> "mov 40+save, %rdi \n\t"
>> "mov 56+save, %rbp \n\t"
>> "mov 64+save, %r8 \n\t"
>> "mov 72+save, %r9 \n\t"
>> "mov 80+save, %r10  \n\t"
>> "mov 88+save, %r11 \n\t"
>> "mov 96+save, %r12 \n\t"
>> "mov 104+save, %r13 \n\t"
>> "mov 112+save, %r14 \n\t"
>> "mov 120+save, %r15 \n\t"
>>
>> "test_insn:\n\t"
>> "in  (%dx),%al\n\t"
>> ". = . + 31\n\t"
>> "test_insn_end:\n\t"
>>
>> "pushf \n\t"
>> "pop 136+save \n\t"
>> "mov %rax, 0+save \n\t"
>> "mov %rbx, 8+save \n\t"
>> "mov %rcx, 16+save \n\t"
>> "mov %rdx, 24+save \n\t"
>> "mov %rsi, 32+save \n\t"
>> "mov %rdi, 40+save \n\t"
>> "mov %rbp, 56+save \n\t"
>> "mov %r8, 64+save \n\t"
>> "mov %r9, 72+save \n\t"
>> "mov %r10, 80+save \n\t"
>> "mov %r11, 88+save \n\t"
>> "mov %r12, 96+save \n\t"
>> "mov %r13, 104+save \n\t"
>> "mov %r14, 112+save \n\t"
>> "mov %r15, 120+save \n\t"
>> "popf \n\t"
>> "pop %r15; pop %r14 \n\t"
>> "pop %r13; pop %r12 \n\t"
>> "pop %r11; pop %r10 \n\t"
>> "pop %r9; pop %r8 \n\t"
>> "pop %rbp \n\t"
>> "pop %rdi; pop %rsi \n\t"
>> "pop %rdx; pop %rcx \n\t"
>> "pop %rbx; pop %rax \n\t"
>>
>> "ret\n\t"
>> "save:\n\t"
>> ". = . + 256\n\t"
>> ".align 4096\n\t"
>> "alt_insn_page:\n\t"
>> ". = . + 4096\n\t"
>> );
>>
>>
>> static void mk_insn_page(uint8_t *alt_insn_page,
>> uint8_t *alt_insn, int alt_insn_length)
>> {
>>     int i, emul_offset;
>>     for (i=1; i<test_insn_end - test_insn; i++)
>>         test_insn[i] = 0x90; // nop
> Why? Gcc should pad it with nops.
>
>>     emul_offset = test_insn - insn_page;
>>     for (i=0; i<alt_insn_length; i++)
>>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> }
>>
>> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> {
>>     ulong *cr3 = (ulong *)read_cr3();
>>     int save_offset = (u8 *)(&save) - insn_page;
>>
>>     memset(alt_insn_page, 0x90, 4096);
> alt_insn_page should contains the same instruction as insn_page except
> between test_insn and test_insn_end. I do not know how you expect it to
> work otherwise.
In my oponion, only codes between test_insn and test_insn_end in
alt_insn_page need to be set, insn_page will be executed in the guest,
and when trapping into emulator OS will load alt_insn_page (because of
invlpg(insn_page)), then return to guest with executing insn_page
(from TLB). I don't know if this is right, but I use this trick in my
previous patch and it runs well. I use "trace-cmd record -e kvm" to
trace it and found instructions in alt_insn_page are not executed, so
I suppose that alt_insn_page is not loaded to the right place.

Arthur
>
>>     save = inregs;
>>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>>     // Load the code TLB with insn_page, but point the page tables at
>>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>     // This will make the CPU trap on the insn_page instruction but the
>>     // hypervisor will see alt_insn_page.
>>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>>     invlpg(insn_page);
>>     // Load code TLB
>>     asm volatile("call *%0" : : "r"(insn_page));
>>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>>     // Trap, let hypervisor emulate at alt_insn_page
>>     asm volatile("call *%0": : "r"(insn_page+1));
>>
>>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> }
>>
>> static void test_movabs(uint64_t *mem)
>> {
>>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>>     inregs = (struct regs){ 0 };
>>     trap_emulator(mem, alt_insn, 10);
>>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> }
>>
>> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
>> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> Hi Gleb,
>> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >>
>> >> >> > They do not have to be generated outside. You can write code into
>> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >
>> >> >> > asm(".align 4096\n\t"
>> >> >> >     ".global insn_page\n\t"
>> >> >> >     ".global insn_page_end\n\t"
>> >> >> >     ".global test_insn\n\t"
>> >> >> >     ".global test_insn_end\n\t"
>> >> >> >     "insn_page:"
>> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >     ...
>> >> >> >     "test_insn:\n\t"
>> >> >> >     "in (%ds), %al\n\t"
>> >> >> >     ". = . + 31\n\t"
>> >> >> >     "test_insn_end:\n\t"
>> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >     ...
>> >> >> >     "ret\n\t"
>> >> >> >     ".align 4096\n\t"
>> >> >> >     "insn_page_end:\n\t");
>> >> >> >
>> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> I used such codes:
>> >> >>
>> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> > virt_to_phys?
>> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> is the same.
>> >> >
>> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> > +1?
>> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> directly call the second byte, which is the real content of insn_page.
>> > Send the code.
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19  1:26                     ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-19  9:31                       ` Gleb Natapov
  2013-06-19 12:18                         ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-19  9:31 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> > Send code in a form of a patch.
> >
> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> extern u8 insn_page[], insn_page_end[];
> >> extern u8 test_insn[], test_insn_end[];
> >> extern u8 alt_insn_page[];
> >>
> >> asm(
> >> ".align 4096\n\t"
> >> ".global insn_page\n\t"
> >> ".global insn_page_end\n\t"
> >> ".global test_insn\n\t"
> >> ".global test_insn_end\n\t"
> >> "insn_page:\n\t"
> >>
> >> "ret \n\t"
> >>
> >> "push %rax; push %rbx\n\t"
> >> "push %rcx; push %rdx\n\t"
> >> "push %rsi; push %rdi\n\t"
> >> "push %rbp\n\t"
> >> "push %r8; push %r9\n\t"
> >> "push %r10; push %r11\n\t"
> >> "push %r12; push %r13\n\t"
> >> "push %r14; push %r15\n\t"
> >> "pushf\n\t"
> >>
> >> "push 136+save \n\t"
> >> "popf \n\t"
> >> "mov 0+save, %rax \n\t"
> >> "mov 8+save, %rbx \n\t"
> >> "mov 16+save, %rcx \n\t"
> >> "mov 24+save, %rdx \n\t"
> >> "mov 32+save, %rsi \n\t"
> >> "mov 40+save, %rdi \n\t"
> >> "mov 56+save, %rbp \n\t"
> >> "mov 64+save, %r8 \n\t"
> >> "mov 72+save, %r9 \n\t"
> >> "mov 80+save, %r10  \n\t"
> >> "mov 88+save, %r11 \n\t"
> >> "mov 96+save, %r12 \n\t"
> >> "mov 104+save, %r13 \n\t"
> >> "mov 112+save, %r14 \n\t"
> >> "mov 120+save, %r15 \n\t"
> >>
> >> "test_insn:\n\t"
> >> "in  (%dx),%al\n\t"
> >> ". = . + 31\n\t"
> >> "test_insn_end:\n\t"
> >>
> >> "pushf \n\t"
> >> "pop 136+save \n\t"
> >> "mov %rax, 0+save \n\t"
> >> "mov %rbx, 8+save \n\t"
> >> "mov %rcx, 16+save \n\t"
> >> "mov %rdx, 24+save \n\t"
> >> "mov %rsi, 32+save \n\t"
> >> "mov %rdi, 40+save \n\t"
> >> "mov %rbp, 56+save \n\t"
> >> "mov %r8, 64+save \n\t"
> >> "mov %r9, 72+save \n\t"
> >> "mov %r10, 80+save \n\t"
> >> "mov %r11, 88+save \n\t"
> >> "mov %r12, 96+save \n\t"
> >> "mov %r13, 104+save \n\t"
> >> "mov %r14, 112+save \n\t"
> >> "mov %r15, 120+save \n\t"
> >> "popf \n\t"
> >> "pop %r15; pop %r14 \n\t"
> >> "pop %r13; pop %r12 \n\t"
> >> "pop %r11; pop %r10 \n\t"
> >> "pop %r9; pop %r8 \n\t"
> >> "pop %rbp \n\t"
> >> "pop %rdi; pop %rsi \n\t"
> >> "pop %rdx; pop %rcx \n\t"
> >> "pop %rbx; pop %rax \n\t"
> >>
> >> "ret\n\t"
> >> "save:\n\t"
> >> ". = . + 256\n\t"
> >> ".align 4096\n\t"
> >> "alt_insn_page:\n\t"
> >> ". = . + 4096\n\t"
> >> );
> >>
> >>
> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> uint8_t *alt_insn, int alt_insn_length)
> >> {
> >>     int i, emul_offset;
> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >>         test_insn[i] = 0x90; // nop
> > Why? Gcc should pad it with nops.
> >
> >>     emul_offset = test_insn - insn_page;
> >>     for (i=0; i<alt_insn_length; i++)
> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> }
> >>
> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> {
> >>     ulong *cr3 = (ulong *)read_cr3();
> >>     int save_offset = (u8 *)(&save) - insn_page;
> >>
> >>     memset(alt_insn_page, 0x90, 4096);
> > alt_insn_page should contains the same instruction as insn_page except
> > between test_insn and test_insn_end. I do not know how you expect it to
> > work otherwise.
> In my oponion, only codes between test_insn and test_insn_end in
> alt_insn_page need to be set, insn_page will be executed in the guest,
> and when trapping into emulator OS will load alt_insn_page (because of
> invlpg(insn_page)), then return to guest with executing insn_page
> (from TLB).
While before trap the code will likely be executed from insn_page,
but after the trap it is very optimistic to assume that tlb cache
will still contain this virtual address since host will execute quite a
lot of code and can even schedule in the middle, so the TLB will not
contain the address and your test will crash. Even the code before test
instruction can be executed from alt_insn_page if guest is scheduled out
after invlpg() and before it executes every instruction until trapping
one. In your case the test will crash too instead of yielding false positive.

> I don't know if this is right, but I use this trick in my
> previous patch and it runs well.
Your previous patches always had c3 (ret) after tested instruction on
alt_insn_page.

>                                  I use "trace-cmd record -e kvm" to
> trace it and found instructions in alt_insn_page are not executed, so
> I suppose that alt_insn_page is not loaded to the right place.
Do you see "in" instruction emulated? Anyway current code is incorrect
since current install_page() implementation cannot handle large pages
and the code is backed up by large pages. You can fix install_page() to
check for that and break large page into small one before installing a
page.

> 
> Arthur
> >
> >>     save = inregs;
> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
> >>     // Load the code TLB with insn_page, but point the page tables at
> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>     // This will make the CPU trap on the insn_page instruction but the
> >>     // hypervisor will see alt_insn_page.
> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
> >>     invlpg(insn_page);
> >>     // Load code TLB
> >>     asm volatile("call *%0" : : "r"(insn_page));
> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >>     // Trap, let hypervisor emulate at alt_insn_page
> >>     asm volatile("call *%0": : "r"(insn_page+1));
> >>
> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> >> }
> >>
> >> static void test_movabs(uint64_t *mem)
> >> {
> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
> >>     inregs = (struct regs){ 0 };
> >>     trap_emulator(mem, alt_insn, 10);
> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> >> }
> >>
> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> Hi Gleb,
> >> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >> >>
> >> >> >> > They do not have to be generated outside. You can write code into
> >> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >> >
> >> >> >> > asm(".align 4096\n\t"
> >> >> >> >     ".global insn_page\n\t"
> >> >> >> >     ".global insn_page_end\n\t"
> >> >> >> >     ".global test_insn\n\t"
> >> >> >> >     ".global test_insn_end\n\t"
> >> >> >> >     "insn_page:"
> >> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >> >     ...
> >> >> >> >     "test_insn:\n\t"
> >> >> >> >     "in (%ds), %al\n\t"
> >> >> >> >     ". = . + 31\n\t"
> >> >> >> >     "test_insn_end:\n\t"
> >> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >> >     ...
> >> >> >> >     "ret\n\t"
> >> >> >> >     ".align 4096\n\t"
> >> >> >> >     "insn_page_end:\n\t");
> >> >> >> >
> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> >> I used such codes:
> >> >> >>
> >> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> >> > virt_to_phys?
> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> >> is the same.
> >> >> >
> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> >> > +1?
> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> >> directly call the second byte, which is the real content of insn_page.
> >> > Send the code.
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19  9:31                       ` Gleb Natapov
@ 2013-06-19 12:18                         ` 李春奇 <Arthur Chunqi Li>
  2013-06-19 12:26                           ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19 12:18 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> > Send code in a form of a patch.
>> >
>> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> extern u8 insn_page[], insn_page_end[];
>> >> extern u8 test_insn[], test_insn_end[];
>> >> extern u8 alt_insn_page[];
>> >>
>> >> asm(
>> >> ".align 4096\n\t"
>> >> ".global insn_page\n\t"
>> >> ".global insn_page_end\n\t"
>> >> ".global test_insn\n\t"
>> >> ".global test_insn_end\n\t"
>> >> "insn_page:\n\t"
>> >>
>> >> "ret \n\t"
>> >>
>> >> "push %rax; push %rbx\n\t"
>> >> "push %rcx; push %rdx\n\t"
>> >> "push %rsi; push %rdi\n\t"
>> >> "push %rbp\n\t"
>> >> "push %r8; push %r9\n\t"
>> >> "push %r10; push %r11\n\t"
>> >> "push %r12; push %r13\n\t"
>> >> "push %r14; push %r15\n\t"
>> >> "pushf\n\t"
>> >>
>> >> "push 136+save \n\t"
>> >> "popf \n\t"
>> >> "mov 0+save, %rax \n\t"
>> >> "mov 8+save, %rbx \n\t"
>> >> "mov 16+save, %rcx \n\t"
>> >> "mov 24+save, %rdx \n\t"
>> >> "mov 32+save, %rsi \n\t"
>> >> "mov 40+save, %rdi \n\t"
>> >> "mov 56+save, %rbp \n\t"
>> >> "mov 64+save, %r8 \n\t"
>> >> "mov 72+save, %r9 \n\t"
>> >> "mov 80+save, %r10  \n\t"
>> >> "mov 88+save, %r11 \n\t"
>> >> "mov 96+save, %r12 \n\t"
>> >> "mov 104+save, %r13 \n\t"
>> >> "mov 112+save, %r14 \n\t"
>> >> "mov 120+save, %r15 \n\t"
>> >>
>> >> "test_insn:\n\t"
>> >> "in  (%dx),%al\n\t"
>> >> ". = . + 31\n\t"
>> >> "test_insn_end:\n\t"
>> >>
>> >> "pushf \n\t"
>> >> "pop 136+save \n\t"
>> >> "mov %rax, 0+save \n\t"
>> >> "mov %rbx, 8+save \n\t"
>> >> "mov %rcx, 16+save \n\t"
>> >> "mov %rdx, 24+save \n\t"
>> >> "mov %rsi, 32+save \n\t"
>> >> "mov %rdi, 40+save \n\t"
>> >> "mov %rbp, 56+save \n\t"
>> >> "mov %r8, 64+save \n\t"
>> >> "mov %r9, 72+save \n\t"
>> >> "mov %r10, 80+save \n\t"
>> >> "mov %r11, 88+save \n\t"
>> >> "mov %r12, 96+save \n\t"
>> >> "mov %r13, 104+save \n\t"
>> >> "mov %r14, 112+save \n\t"
>> >> "mov %r15, 120+save \n\t"
>> >> "popf \n\t"
>> >> "pop %r15; pop %r14 \n\t"
>> >> "pop %r13; pop %r12 \n\t"
>> >> "pop %r11; pop %r10 \n\t"
>> >> "pop %r9; pop %r8 \n\t"
>> >> "pop %rbp \n\t"
>> >> "pop %rdi; pop %rsi \n\t"
>> >> "pop %rdx; pop %rcx \n\t"
>> >> "pop %rbx; pop %rax \n\t"
>> >>
>> >> "ret\n\t"
>> >> "save:\n\t"
>> >> ". = . + 256\n\t"
>> >> ".align 4096\n\t"
>> >> "alt_insn_page:\n\t"
>> >> ". = . + 4096\n\t"
>> >> );
>> >>
>> >>
>> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> uint8_t *alt_insn, int alt_insn_length)
>> >> {
>> >>     int i, emul_offset;
>> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >>         test_insn[i] = 0x90; // nop
>> > Why? Gcc should pad it with nops.
>> >
>> >>     emul_offset = test_insn - insn_page;
>> >>     for (i=0; i<alt_insn_length; i++)
>> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> }
>> >>
>> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> {
>> >>     ulong *cr3 = (ulong *)read_cr3();
>> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >>
>> >>     memset(alt_insn_page, 0x90, 4096);
>> > alt_insn_page should contains the same instruction as insn_page except
>> > between test_insn and test_insn_end. I do not know how you expect it to
>> > work otherwise.
>> In my oponion, only codes between test_insn and test_insn_end in
>> alt_insn_page need to be set, insn_page will be executed in the guest,
>> and when trapping into emulator OS will load alt_insn_page (because of
>> invlpg(insn_page)), then return to guest with executing insn_page
>> (from TLB).
> While before trap the code will likely be executed from insn_page,
> but after the trap it is very optimistic to assume that tlb cache
> will still contain this virtual address since host will execute quite a
> lot of code and can even schedule in the middle, so the TLB will not
> contain the address and your test will crash. Even the code before test
> instruction can be executed from alt_insn_page if guest is scheduled out
> after invlpg() and before it executes every instruction until trapping
> one. In your case the test will crash too instead of yielding false positive.
>
>> I don't know if this is right, but I use this trick in my
>> previous patch and it runs well.
> Your previous patches always had c3 (ret) after tested instruction on
> alt_insn_page.
>
>>                                  I use "trace-cmd record -e kvm" to
>> trace it and found instructions in alt_insn_page are not executed, so
>> I suppose that alt_insn_page is not loaded to the right place.
> Do you see "in" instruction emulated? Anyway current code is incorrect
> since current install_page() implementation cannot handle large pages
> and the code is backed up by large pages. You can fix install_page() to
> check for that and break large page into small one before installing a
> page.
Here I have two questions.
1. There's another function called "install_large_page", can it be
used to our occasion? I found that this function is not used at all.
2. Why will current version runs well? Do pages allocated dynamically
are automatically aligned to 2MB (large page size)?

Arthur
>
>>
>> Arthur
>> >
>> >>     save = inregs;
>> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>> >>     // Load the code TLB with insn_page, but point the page tables at
>> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >>     // This will make the CPU trap on the insn_page instruction but the
>> >>     // hypervisor will see alt_insn_page.
>> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>> >>     invlpg(insn_page);
>> >>     // Load code TLB
>> >>     asm volatile("call *%0" : : "r"(insn_page));
>> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >>     // Trap, let hypervisor emulate at alt_insn_page
>> >>     asm volatile("call *%0": : "r"(insn_page+1));
>> >>
>> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> >> }
>> >>
>> >> static void test_movabs(uint64_t *mem)
>> >> {
>> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>> >>     inregs = (struct regs){ 0 };
>> >>     trap_emulator(mem, alt_insn, 10);
>> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> >> }
>> >>
>> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
>> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> >> Hi Gleb,
>> >> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >> >>
>> >> >> >> > They do not have to be generated outside. You can write code into
>> >> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >> >
>> >> >> >> > asm(".align 4096\n\t"
>> >> >> >> >     ".global insn_page\n\t"
>> >> >> >> >     ".global insn_page_end\n\t"
>> >> >> >> >     ".global test_insn\n\t"
>> >> >> >> >     ".global test_insn_end\n\t"
>> >> >> >> >     "insn_page:"
>> >> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >> >     ...
>> >> >> >> >     "test_insn:\n\t"
>> >> >> >> >     "in (%ds), %al\n\t"
>> >> >> >> >     ". = . + 31\n\t"
>> >> >> >> >     "test_insn_end:\n\t"
>> >> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >> >     ...
>> >> >> >> >     "ret\n\t"
>> >> >> >> >     ".align 4096\n\t"
>> >> >> >> >     "insn_page_end:\n\t");
>> >> >> >> >
>> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> >> I used such codes:
>> >> >> >>
>> >> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> >> > virt_to_phys?
>> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> >> is the same.
>> >> >> >
>> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> >> > +1?
>> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> >> directly call the second byte, which is the real content of insn_page.
>> >> > Send the code.
>> >> >
>> >> > --
>> >> >                         Gleb.
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 12:18                         ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-19 12:26                           ` Gleb Natapov
  2013-06-19 12:30                             ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-19 12:26 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > Send code in a form of a patch.
> >> >
> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> extern u8 alt_insn_page[];
> >> >>
> >> >> asm(
> >> >> ".align 4096\n\t"
> >> >> ".global insn_page\n\t"
> >> >> ".global insn_page_end\n\t"
> >> >> ".global test_insn\n\t"
> >> >> ".global test_insn_end\n\t"
> >> >> "insn_page:\n\t"
> >> >>
> >> >> "ret \n\t"
> >> >>
> >> >> "push %rax; push %rbx\n\t"
> >> >> "push %rcx; push %rdx\n\t"
> >> >> "push %rsi; push %rdi\n\t"
> >> >> "push %rbp\n\t"
> >> >> "push %r8; push %r9\n\t"
> >> >> "push %r10; push %r11\n\t"
> >> >> "push %r12; push %r13\n\t"
> >> >> "push %r14; push %r15\n\t"
> >> >> "pushf\n\t"
> >> >>
> >> >> "push 136+save \n\t"
> >> >> "popf \n\t"
> >> >> "mov 0+save, %rax \n\t"
> >> >> "mov 8+save, %rbx \n\t"
> >> >> "mov 16+save, %rcx \n\t"
> >> >> "mov 24+save, %rdx \n\t"
> >> >> "mov 32+save, %rsi \n\t"
> >> >> "mov 40+save, %rdi \n\t"
> >> >> "mov 56+save, %rbp \n\t"
> >> >> "mov 64+save, %r8 \n\t"
> >> >> "mov 72+save, %r9 \n\t"
> >> >> "mov 80+save, %r10  \n\t"
> >> >> "mov 88+save, %r11 \n\t"
> >> >> "mov 96+save, %r12 \n\t"
> >> >> "mov 104+save, %r13 \n\t"
> >> >> "mov 112+save, %r14 \n\t"
> >> >> "mov 120+save, %r15 \n\t"
> >> >>
> >> >> "test_insn:\n\t"
> >> >> "in  (%dx),%al\n\t"
> >> >> ". = . + 31\n\t"
> >> >> "test_insn_end:\n\t"
> >> >>
> >> >> "pushf \n\t"
> >> >> "pop 136+save \n\t"
> >> >> "mov %rax, 0+save \n\t"
> >> >> "mov %rbx, 8+save \n\t"
> >> >> "mov %rcx, 16+save \n\t"
> >> >> "mov %rdx, 24+save \n\t"
> >> >> "mov %rsi, 32+save \n\t"
> >> >> "mov %rdi, 40+save \n\t"
> >> >> "mov %rbp, 56+save \n\t"
> >> >> "mov %r8, 64+save \n\t"
> >> >> "mov %r9, 72+save \n\t"
> >> >> "mov %r10, 80+save \n\t"
> >> >> "mov %r11, 88+save \n\t"
> >> >> "mov %r12, 96+save \n\t"
> >> >> "mov %r13, 104+save \n\t"
> >> >> "mov %r14, 112+save \n\t"
> >> >> "mov %r15, 120+save \n\t"
> >> >> "popf \n\t"
> >> >> "pop %r15; pop %r14 \n\t"
> >> >> "pop %r13; pop %r12 \n\t"
> >> >> "pop %r11; pop %r10 \n\t"
> >> >> "pop %r9; pop %r8 \n\t"
> >> >> "pop %rbp \n\t"
> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> "pop %rbx; pop %rax \n\t"
> >> >>
> >> >> "ret\n\t"
> >> >> "save:\n\t"
> >> >> ". = . + 256\n\t"
> >> >> ".align 4096\n\t"
> >> >> "alt_insn_page:\n\t"
> >> >> ". = . + 4096\n\t"
> >> >> );
> >> >>
> >> >>
> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> {
> >> >>     int i, emul_offset;
> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >>         test_insn[i] = 0x90; // nop
> >> > Why? Gcc should pad it with nops.
> >> >
> >> >>     emul_offset = test_insn - insn_page;
> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> }
> >> >>
> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> {
> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >>
> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> > alt_insn_page should contains the same instruction as insn_page except
> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> > work otherwise.
> >> In my oponion, only codes between test_insn and test_insn_end in
> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> (from TLB).
> > While before trap the code will likely be executed from insn_page,
> > but after the trap it is very optimistic to assume that tlb cache
> > will still contain this virtual address since host will execute quite a
> > lot of code and can even schedule in the middle, so the TLB will not
> > contain the address and your test will crash. Even the code before test
> > instruction can be executed from alt_insn_page if guest is scheduled out
> > after invlpg() and before it executes every instruction until trapping
> > one. In your case the test will crash too instead of yielding false positive.
> >
> >> I don't know if this is right, but I use this trick in my
> >> previous patch and it runs well.
> > Your previous patches always had c3 (ret) after tested instruction on
> > alt_insn_page.
> >
> >>                                  I use "trace-cmd record -e kvm" to
> >> trace it and found instructions in alt_insn_page are not executed, so
> >> I suppose that alt_insn_page is not loaded to the right place.
> > Do you see "in" instruction emulated? Anyway current code is incorrect
> > since current install_page() implementation cannot handle large pages
> > and the code is backed up by large pages. You can fix install_page() to
> > check for that and break large page into small one before installing a
> > page.
> Here I have two questions.
> 1. There's another function called "install_large_page", can it be
> used to our occasion? I found that this function is not used at all.
It is used when initial page tables are created.
See lib/x86/vm.c:setup_mmu_range()

> 2. Why will current version runs well? Do pages allocated dynamically
> are automatically aligned to 2MB (large page size)?
> 
No, they are 4K pages.

> Arthur
> >
> >>
> >> Arthur
> >> >
> >> >>     save = inregs;
> >> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
> >> >>     // Load the code TLB with insn_page, but point the page tables at
> >> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >> >>     // This will make the CPU trap on the insn_page instruction but the
> >> >>     // hypervisor will see alt_insn_page.
> >> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
> >> >>     invlpg(insn_page);
> >> >>     // Load code TLB
> >> >>     asm volatile("call *%0" : : "r"(insn_page));
> >> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >>     // Trap, let hypervisor emulate at alt_insn_page
> >> >>     asm volatile("call *%0": : "r"(insn_page+1));
> >> >>
> >> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
> >> >> }
> >> >>
> >> >> static void test_movabs(uint64_t *mem)
> >> >> {
> >> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
> >> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
> >> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
> >> >>     inregs = (struct regs){ 0 };
> >> >>     trap_emulator(mem, alt_insn, 10);
> >> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
> >> >> }
> >> >>
> >> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
> >> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> >> Hi Gleb,
> >> >> >> >> >> I'm trying to solve these problems in the past days and meet many
> >> >> >> >> >> difficulties. You want to save all the general registers in calling
> >> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
> >> >> >> >> >> Because all the instructions should be generated outside and copy to
> >> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
> >> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
> >> >> >> >> >>
> >> >> >> >> > They do not have to be generated outside. You can write code into
> >> >> >> >> > insn_page directly. Something like this outside of any functions:
> >> >> >> >> >
> >> >> >> >> > asm(".align 4096\n\t"
> >> >> >> >> >     ".global insn_page\n\t"
> >> >> >> >> >     ".global insn_page_end\n\t"
> >> >> >> >> >     ".global test_insn\n\t"
> >> >> >> >> >     ".global test_insn_end\n\t"
> >> >> >> >> >     "insn_page:"
> >> >> >> >> >     "mov %%rax, outregs \n\t"
> >> >> >> >> >     ...
> >> >> >> >> >     "test_insn:\n\t"
> >> >> >> >> >     "in (%ds), %al\n\t"
> >> >> >> >> >     ". = . + 31\n\t"
> >> >> >> >> >     "test_insn_end:\n\t"
> >> >> >> >> >     "mov outregs, %%rax\n\t"
> >> >> >> >> >     ...
> >> >> >> >> >     "ret\n\t"
> >> >> >> >> >     ".align 4096\n\t"
> >> >> >> >> >     "insn_page_end:\n\t");
> >> >> >> >> >
> >> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
> >> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
> >> >> >> >> I used such codes:
> >> >> >> >>
> >> >> >> >> invlpg((void *)virt_to_phys(insn_page));
> >> >> >> > virt_to_phys?
> >> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
> >> >> >> is the same.
> >> >> >> >
> >> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
> >> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
> >> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
> >> >> >> > +1?
> >> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
> >> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
> >> >> >> directly call the second byte, which is the real content of insn_page.
> >> >> > Send the code.
> >> >> >
> >> >> > --
> >> >> >                         Gleb.
> >> >>
> >> >>
> >> >>
> >> >> --
> >> >> Arthur Chunqi Li
> >> >> Department of Computer Science
> >> >> School of EECS
> >> >> Peking University
> >> >> Beijing, China
> >> >
> >> > --
> >> >                         Gleb.
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> >
> > --
> >                         Gleb.
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 12:26                           ` Gleb Natapov
@ 2013-06-19 12:30                             ` 李春奇 <Arthur Chunqi Li>
  2013-06-19 12:32                               ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19 12:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > Send code in a form of a patch.
>> >> >
>> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> extern u8 alt_insn_page[];
>> >> >>
>> >> >> asm(
>> >> >> ".align 4096\n\t"
>> >> >> ".global insn_page\n\t"
>> >> >> ".global insn_page_end\n\t"
>> >> >> ".global test_insn\n\t"
>> >> >> ".global test_insn_end\n\t"
>> >> >> "insn_page:\n\t"
>> >> >>
>> >> >> "ret \n\t"
>> >> >>
>> >> >> "push %rax; push %rbx\n\t"
>> >> >> "push %rcx; push %rdx\n\t"
>> >> >> "push %rsi; push %rdi\n\t"
>> >> >> "push %rbp\n\t"
>> >> >> "push %r8; push %r9\n\t"
>> >> >> "push %r10; push %r11\n\t"
>> >> >> "push %r12; push %r13\n\t"
>> >> >> "push %r14; push %r15\n\t"
>> >> >> "pushf\n\t"
>> >> >>
>> >> >> "push 136+save \n\t"
>> >> >> "popf \n\t"
>> >> >> "mov 0+save, %rax \n\t"
>> >> >> "mov 8+save, %rbx \n\t"
>> >> >> "mov 16+save, %rcx \n\t"
>> >> >> "mov 24+save, %rdx \n\t"
>> >> >> "mov 32+save, %rsi \n\t"
>> >> >> "mov 40+save, %rdi \n\t"
>> >> >> "mov 56+save, %rbp \n\t"
>> >> >> "mov 64+save, %r8 \n\t"
>> >> >> "mov 72+save, %r9 \n\t"
>> >> >> "mov 80+save, %r10  \n\t"
>> >> >> "mov 88+save, %r11 \n\t"
>> >> >> "mov 96+save, %r12 \n\t"
>> >> >> "mov 104+save, %r13 \n\t"
>> >> >> "mov 112+save, %r14 \n\t"
>> >> >> "mov 120+save, %r15 \n\t"
>> >> >>
>> >> >> "test_insn:\n\t"
>> >> >> "in  (%dx),%al\n\t"
>> >> >> ". = . + 31\n\t"
>> >> >> "test_insn_end:\n\t"
>> >> >>
>> >> >> "pushf \n\t"
>> >> >> "pop 136+save \n\t"
>> >> >> "mov %rax, 0+save \n\t"
>> >> >> "mov %rbx, 8+save \n\t"
>> >> >> "mov %rcx, 16+save \n\t"
>> >> >> "mov %rdx, 24+save \n\t"
>> >> >> "mov %rsi, 32+save \n\t"
>> >> >> "mov %rdi, 40+save \n\t"
>> >> >> "mov %rbp, 56+save \n\t"
>> >> >> "mov %r8, 64+save \n\t"
>> >> >> "mov %r9, 72+save \n\t"
>> >> >> "mov %r10, 80+save \n\t"
>> >> >> "mov %r11, 88+save \n\t"
>> >> >> "mov %r12, 96+save \n\t"
>> >> >> "mov %r13, 104+save \n\t"
>> >> >> "mov %r14, 112+save \n\t"
>> >> >> "mov %r15, 120+save \n\t"
>> >> >> "popf \n\t"
>> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> "pop %rbp \n\t"
>> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> "pop %rbx; pop %rax \n\t"
>> >> >>
>> >> >> "ret\n\t"
>> >> >> "save:\n\t"
>> >> >> ". = . + 256\n\t"
>> >> >> ".align 4096\n\t"
>> >> >> "alt_insn_page:\n\t"
>> >> >> ". = . + 4096\n\t"
>> >> >> );
>> >> >>
>> >> >>
>> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> {
>> >> >>     int i, emul_offset;
>> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >>         test_insn[i] = 0x90; // nop
>> >> > Why? Gcc should pad it with nops.
>> >> >
>> >> >>     emul_offset = test_insn - insn_page;
>> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> }
>> >> >>
>> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> {
>> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >>
>> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> > work otherwise.
>> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> (from TLB).
>> > While before trap the code will likely be executed from insn_page,
>> > but after the trap it is very optimistic to assume that tlb cache
>> > will still contain this virtual address since host will execute quite a
>> > lot of code and can even schedule in the middle, so the TLB will not
>> > contain the address and your test will crash. Even the code before test
>> > instruction can be executed from alt_insn_page if guest is scheduled out
>> > after invlpg() and before it executes every instruction until trapping
>> > one. In your case the test will crash too instead of yielding false positive.
>> >
>> >> I don't know if this is right, but I use this trick in my
>> >> previous patch and it runs well.
>> > Your previous patches always had c3 (ret) after tested instruction on
>> > alt_insn_page.
>> >
>> >>                                  I use "trace-cmd record -e kvm" to
>> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> I suppose that alt_insn_page is not loaded to the right place.
>> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> > since current install_page() implementation cannot handle large pages
>> > and the code is backed up by large pages. You can fix install_page() to
>> > check for that and break large page into small one before installing a
>> > page.
>> Here I have two questions.
>> 1. There's another function called "install_large_page", can it be
>> used to our occasion? I found that this function is not used at all.
> It is used when initial page tables are created.
> See lib/x86/vm.c:setup_mmu_range()
>
>> 2. Why will current version runs well? Do pages allocated dynamically
>> are automatically aligned to 2MB (large page size)?
>>
> No, they are 4K pages.
Thus why dynamically creating insn_page and alt_insn_page with
alloc_page() can get the right result?
>
>> Arthur
>> >
>> >>
>> >> Arthur
>> >> >
>> >> >>     save = inregs;
>> >> >>     mk_insn_page(alt_insn_page, alt_insn, alt_insn_length);
>> >> >>     // Load the code TLB with insn_page, but point the page tables at
>> >> >>     // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> >> >>     // This will make the CPU trap on the insn_page instruction but the
>> >> >>     // hypervisor will see alt_insn_page.
>> >> >>     //install_page(cr3, virt_to_phys(insn_page), insn_page);
>> >> >>     invlpg(insn_page);
>> >> >>     // Load code TLB
>> >> >>     asm volatile("call *%0" : : "r"(insn_page));
>> >> >>     install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >>     // Trap, let hypervisor emulate at alt_insn_page
>> >> >>     asm volatile("call *%0": : "r"(insn_page+1));
>> >> >>
>> >> >>     outregs = *((struct regs *)(&alt_insn_page[save_offset]));
>> >> >> }
>> >> >>
>> >> >> static void test_movabs(uint64_t *mem)
>> >> >> {
>> >> >>     // mov $0xc3c3c3c3c3c3c3c3, %rcx
>> >> >>     uint8_t alt_insn[] = {0x48, 0xb9, 0xc3, 0xc3, 0xc3,
>> >> >>                                 0xc3, 0xc3, 0xc3, 0xc3, 0xc3};
>> >> >>     inregs = (struct regs){ 0 };
>> >> >>     trap_emulator(mem, alt_insn, 10);
>> >> >>     report("64-bit mov imm2", outregs.rcx == 0xc3c3c3c3c3c3c3c3);
>> >> >> }
>> >> >>
>> >> >> On Wed, Jun 19, 2013 at 12:09 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Tue, Jun 18, 2013 at 11:56:24PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> On Tue, Jun 18, 2013 at 11:47 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > On Tue, Jun 18, 2013 at 10:28:59PM +0800, Ê??Ê?••? <Arthur Chunqi Li> wrote:
>> >> >> >> >> On Tue, Jun 18, 2013 at 8:45 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> >> > On Thu, Jun 13, 2013 at 05:30:03PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> >> >> Hi Gleb,
>> >> >> >> >> >> I'm trying to solve these problems in the past days and meet many
>> >> >> >> >> >> difficulties. You want to save all the general registers in calling
>> >> >> >> >> >> insn_page, so registers should be saved to (save) in insn_page.
>> >> >> >> >> >> Because all the instructions should be generated outside and copy to
>> >> >> >> >> >> insn_page, and the instructions generated outside is RIP-relative, so
>> >> >> >> >> >> inside insn_page (save) will be wrong pointed with RIP-relative code.
>> >> >> >> >> >>
>> >> >> >> >> > They do not have to be generated outside. You can write code into
>> >> >> >> >> > insn_page directly. Something like this outside of any functions:
>> >> >> >> >> >
>> >> >> >> >> > asm(".align 4096\n\t"
>> >> >> >> >> >     ".global insn_page\n\t"
>> >> >> >> >> >     ".global insn_page_end\n\t"
>> >> >> >> >> >     ".global test_insn\n\t"
>> >> >> >> >> >     ".global test_insn_end\n\t"
>> >> >> >> >> >     "insn_page:"
>> >> >> >> >> >     "mov %%rax, outregs \n\t"
>> >> >> >> >> >     ...
>> >> >> >> >> >     "test_insn:\n\t"
>> >> >> >> >> >     "in (%ds), %al\n\t"
>> >> >> >> >> >     ". = . + 31\n\t"
>> >> >> >> >> >     "test_insn_end:\n\t"
>> >> >> >> >> >     "mov outregs, %%rax\n\t"
>> >> >> >> >> >     ...
>> >> >> >> >> >     "ret\n\t"
>> >> >> >> >> >     ".align 4096\n\t"
>> >> >> >> >> >     "insn_page_end:\n\t");
>> >> >> >> >> >
>> >> >> >> >> > Now you copy that into alt_insn_page, put instruction you want to test
>> >> >> >> >> > into test_insn offset and remap alt_insn_page into "insn_page" virtual address.
>> >> >> >> >> I used such codes:
>> >> >> >> >>
>> >> >> >> >> invlpg((void *)virt_to_phys(insn_page));
>> >> >> >> > virt_to_phys?
>> >> >> >> This is a mistake, I changed it to "invlpg(insn_page)" but the result
>> >> >> >> is the same.
>> >> >> >> >
>> >> >> >> >> asm volatile("call *%0" : : "r"(insn_page));
>> >> >> >> >> install_page(cr3, virt_to_phys(alt_insn_page), insn_page);
>> >> >> >> >> asm volatile("call *%0": : "r"(insn_page+1));
>> >> >> >> > +1?
>> >> >> >> Here I put "ret" on the first byte of insn_page, so the first call of
>> >> >> >> "insn_page" can just return, and the second call of "insn_page+1“ will
>> >> >> >> directly call the second byte, which is the real content of insn_page.
>> >> >> > Send the code.
>> >> >> >
>> >> >> > --
>> >> >> >                         Gleb.
>> >> >>
>> >> >>
>> >> >>
>> >> >> --
>> >> >> Arthur Chunqi Li
>> >> >> Department of Computer Science
>> >> >> School of EECS
>> >> >> Peking University
>> >> >> Beijing, China
>> >> >
>> >> > --
>> >> >                         Gleb.
>> >>
>> >>
>> >>
>> >> --
>> >> Arthur Chunqi Li
>> >> Department of Computer Science
>> >> School of EECS
>> >> Peking University
>> >> Beijing, China
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 12:30                             ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-19 12:32                               ` Gleb Natapov
  2013-06-19 14:01                                 ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-19 12:32 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 08:30:33PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > Send code in a form of a patch.
> >> >> >
> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> >> extern u8 alt_insn_page[];
> >> >> >>
> >> >> >> asm(
> >> >> >> ".align 4096\n\t"
> >> >> >> ".global insn_page\n\t"
> >> >> >> ".global insn_page_end\n\t"
> >> >> >> ".global test_insn\n\t"
> >> >> >> ".global test_insn_end\n\t"
> >> >> >> "insn_page:\n\t"
> >> >> >>
> >> >> >> "ret \n\t"
> >> >> >>
> >> >> >> "push %rax; push %rbx\n\t"
> >> >> >> "push %rcx; push %rdx\n\t"
> >> >> >> "push %rsi; push %rdi\n\t"
> >> >> >> "push %rbp\n\t"
> >> >> >> "push %r8; push %r9\n\t"
> >> >> >> "push %r10; push %r11\n\t"
> >> >> >> "push %r12; push %r13\n\t"
> >> >> >> "push %r14; push %r15\n\t"
> >> >> >> "pushf\n\t"
> >> >> >>
> >> >> >> "push 136+save \n\t"
> >> >> >> "popf \n\t"
> >> >> >> "mov 0+save, %rax \n\t"
> >> >> >> "mov 8+save, %rbx \n\t"
> >> >> >> "mov 16+save, %rcx \n\t"
> >> >> >> "mov 24+save, %rdx \n\t"
> >> >> >> "mov 32+save, %rsi \n\t"
> >> >> >> "mov 40+save, %rdi \n\t"
> >> >> >> "mov 56+save, %rbp \n\t"
> >> >> >> "mov 64+save, %r8 \n\t"
> >> >> >> "mov 72+save, %r9 \n\t"
> >> >> >> "mov 80+save, %r10  \n\t"
> >> >> >> "mov 88+save, %r11 \n\t"
> >> >> >> "mov 96+save, %r12 \n\t"
> >> >> >> "mov 104+save, %r13 \n\t"
> >> >> >> "mov 112+save, %r14 \n\t"
> >> >> >> "mov 120+save, %r15 \n\t"
> >> >> >>
> >> >> >> "test_insn:\n\t"
> >> >> >> "in  (%dx),%al\n\t"
> >> >> >> ". = . + 31\n\t"
> >> >> >> "test_insn_end:\n\t"
> >> >> >>
> >> >> >> "pushf \n\t"
> >> >> >> "pop 136+save \n\t"
> >> >> >> "mov %rax, 0+save \n\t"
> >> >> >> "mov %rbx, 8+save \n\t"
> >> >> >> "mov %rcx, 16+save \n\t"
> >> >> >> "mov %rdx, 24+save \n\t"
> >> >> >> "mov %rsi, 32+save \n\t"
> >> >> >> "mov %rdi, 40+save \n\t"
> >> >> >> "mov %rbp, 56+save \n\t"
> >> >> >> "mov %r8, 64+save \n\t"
> >> >> >> "mov %r9, 72+save \n\t"
> >> >> >> "mov %r10, 80+save \n\t"
> >> >> >> "mov %r11, 88+save \n\t"
> >> >> >> "mov %r12, 96+save \n\t"
> >> >> >> "mov %r13, 104+save \n\t"
> >> >> >> "mov %r14, 112+save \n\t"
> >> >> >> "mov %r15, 120+save \n\t"
> >> >> >> "popf \n\t"
> >> >> >> "pop %r15; pop %r14 \n\t"
> >> >> >> "pop %r13; pop %r12 \n\t"
> >> >> >> "pop %r11; pop %r10 \n\t"
> >> >> >> "pop %r9; pop %r8 \n\t"
> >> >> >> "pop %rbp \n\t"
> >> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> >> "pop %rbx; pop %rax \n\t"
> >> >> >>
> >> >> >> "ret\n\t"
> >> >> >> "save:\n\t"
> >> >> >> ". = . + 256\n\t"
> >> >> >> ".align 4096\n\t"
> >> >> >> "alt_insn_page:\n\t"
> >> >> >> ". = . + 4096\n\t"
> >> >> >> );
> >> >> >>
> >> >> >>
> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> >> {
> >> >> >>     int i, emul_offset;
> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >> >>         test_insn[i] = 0x90; // nop
> >> >> > Why? Gcc should pad it with nops.
> >> >> >
> >> >> >>     emul_offset = test_insn - insn_page;
> >> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> >> }
> >> >> >>
> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> >> {
> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >> >>
> >> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> >> > alt_insn_page should contains the same instruction as insn_page except
> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> >> > work otherwise.
> >> >> In my oponion, only codes between test_insn and test_insn_end in
> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> >> (from TLB).
> >> > While before trap the code will likely be executed from insn_page,
> >> > but after the trap it is very optimistic to assume that tlb cache
> >> > will still contain this virtual address since host will execute quite a
> >> > lot of code and can even schedule in the middle, so the TLB will not
> >> > contain the address and your test will crash. Even the code before test
> >> > instruction can be executed from alt_insn_page if guest is scheduled out
> >> > after invlpg() and before it executes every instruction until trapping
> >> > one. In your case the test will crash too instead of yielding false positive.
> >> >
> >> >> I don't know if this is right, but I use this trick in my
> >> >> previous patch and it runs well.
> >> > Your previous patches always had c3 (ret) after tested instruction on
> >> > alt_insn_page.
> >> >
> >> >>                                  I use "trace-cmd record -e kvm" to
> >> >> trace it and found instructions in alt_insn_page are not executed, so
> >> >> I suppose that alt_insn_page is not loaded to the right place.
> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
> >> > since current install_page() implementation cannot handle large pages
> >> > and the code is backed up by large pages. You can fix install_page() to
> >> > check for that and break large page into small one before installing a
> >> > page.
> >> Here I have two questions.
> >> 1. There's another function called "install_large_page", can it be
> >> used to our occasion? I found that this function is not used at all.
> > It is used when initial page tables are created.
> > See lib/x86/vm.c:setup_mmu_range()
> >
> >> 2. Why will current version runs well? Do pages allocated dynamically
> >> are automatically aligned to 2MB (large page size)?
> >>
> > No, they are 4K pages.
> Thus why dynamically creating insn_page and alt_insn_page with
> alloc_page() can get the right result?
Probably.

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 12:32                               ` Gleb Natapov
@ 2013-06-19 14:01                                 ` 李春奇 <Arthur Chunqi Li>
  2013-06-19 14:13                                   ` Gleb Natapov
  0 siblings, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19 14:01 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

I found the final reason! The initial use of init_ram is also used by
test_rip_relative(), which will cause conflict. I changed it and
everything runs well.

On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 08:30:33PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > Send code in a form of a patch.
>> >> >> >
>> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> >> extern u8 alt_insn_page[];
>> >> >> >>
>> >> >> >> asm(
>> >> >> >> ".align 4096\n\t"
>> >> >> >> ".global insn_page\n\t"
>> >> >> >> ".global insn_page_end\n\t"
>> >> >> >> ".global test_insn\n\t"
>> >> >> >> ".global test_insn_end\n\t"
>> >> >> >> "insn_page:\n\t"
>> >> >> >>
>> >> >> >> "ret \n\t"
>> >> >> >>
>> >> >> >> "push %rax; push %rbx\n\t"
>> >> >> >> "push %rcx; push %rdx\n\t"
>> >> >> >> "push %rsi; push %rdi\n\t"
>> >> >> >> "push %rbp\n\t"
>> >> >> >> "push %r8; push %r9\n\t"
>> >> >> >> "push %r10; push %r11\n\t"
>> >> >> >> "push %r12; push %r13\n\t"
>> >> >> >> "push %r14; push %r15\n\t"
>> >> >> >> "pushf\n\t"
>> >> >> >>
>> >> >> >> "push 136+save \n\t"
>> >> >> >> "popf \n\t"
>> >> >> >> "mov 0+save, %rax \n\t"
>> >> >> >> "mov 8+save, %rbx \n\t"
>> >> >> >> "mov 16+save, %rcx \n\t"
>> >> >> >> "mov 24+save, %rdx \n\t"
>> >> >> >> "mov 32+save, %rsi \n\t"
>> >> >> >> "mov 40+save, %rdi \n\t"
>> >> >> >> "mov 56+save, %rbp \n\t"
>> >> >> >> "mov 64+save, %r8 \n\t"
>> >> >> >> "mov 72+save, %r9 \n\t"
>> >> >> >> "mov 80+save, %r10  \n\t"
>> >> >> >> "mov 88+save, %r11 \n\t"
>> >> >> >> "mov 96+save, %r12 \n\t"
>> >> >> >> "mov 104+save, %r13 \n\t"
>> >> >> >> "mov 112+save, %r14 \n\t"
>> >> >> >> "mov 120+save, %r15 \n\t"
>> >> >> >>
>> >> >> >> "test_insn:\n\t"
>> >> >> >> "in  (%dx),%al\n\t"
>> >> >> >> ". = . + 31\n\t"
>> >> >> >> "test_insn_end:\n\t"
>> >> >> >>
>> >> >> >> "pushf \n\t"
>> >> >> >> "pop 136+save \n\t"
>> >> >> >> "mov %rax, 0+save \n\t"
>> >> >> >> "mov %rbx, 8+save \n\t"
>> >> >> >> "mov %rcx, 16+save \n\t"
>> >> >> >> "mov %rdx, 24+save \n\t"
>> >> >> >> "mov %rsi, 32+save \n\t"
>> >> >> >> "mov %rdi, 40+save \n\t"
>> >> >> >> "mov %rbp, 56+save \n\t"
>> >> >> >> "mov %r8, 64+save \n\t"
>> >> >> >> "mov %r9, 72+save \n\t"
>> >> >> >> "mov %r10, 80+save \n\t"
>> >> >> >> "mov %r11, 88+save \n\t"
>> >> >> >> "mov %r12, 96+save \n\t"
>> >> >> >> "mov %r13, 104+save \n\t"
>> >> >> >> "mov %r14, 112+save \n\t"
>> >> >> >> "mov %r15, 120+save \n\t"
>> >> >> >> "popf \n\t"
>> >> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> >> "pop %rbp \n\t"
>> >> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> >> "pop %rbx; pop %rax \n\t"
>> >> >> >>
>> >> >> >> "ret\n\t"
>> >> >> >> "save:\n\t"
>> >> >> >> ". = . + 256\n\t"
>> >> >> >> ".align 4096\n\t"
>> >> >> >> "alt_insn_page:\n\t"
>> >> >> >> ". = . + 4096\n\t"
>> >> >> >> );
>> >> >> >>
>> >> >> >>
>> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> >> {
>> >> >> >>     int i, emul_offset;
>> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >> >>         test_insn[i] = 0x90; // nop
>> >> >> > Why? Gcc should pad it with nops.
>> >> >> >
>> >> >> >>     emul_offset = test_insn - insn_page;
>> >> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> >> }
>> >> >> >>
>> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> >> {
>> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >> >>
>> >> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> >> > work otherwise.
>> >> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> >> (from TLB).
>> >> > While before trap the code will likely be executed from insn_page,
>> >> > but after the trap it is very optimistic to assume that tlb cache
>> >> > will still contain this virtual address since host will execute quite a
>> >> > lot of code and can even schedule in the middle, so the TLB will not
>> >> > contain the address and your test will crash. Even the code before test
>> >> > instruction can be executed from alt_insn_page if guest is scheduled out
>> >> > after invlpg() and before it executes every instruction until trapping
>> >> > one. In your case the test will crash too instead of yielding false positive.
>> >> >
>> >> >> I don't know if this is right, but I use this trick in my
>> >> >> previous patch and it runs well.
>> >> > Your previous patches always had c3 (ret) after tested instruction on
>> >> > alt_insn_page.
>> >> >
>> >> >>                                  I use "trace-cmd record -e kvm" to
>> >> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> >> I suppose that alt_insn_page is not loaded to the right place.
>> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> >> > since current install_page() implementation cannot handle large pages
>> >> > and the code is backed up by large pages. You can fix install_page() to
>> >> > check for that and break large page into small one before installing a
>> >> > page.
>> >> Here I have two questions.
>> >> 1. There's another function called "install_large_page", can it be
>> >> used to our occasion? I found that this function is not used at all.
>> > It is used when initial page tables are created.
>> > See lib/x86/vm.c:setup_mmu_range()
>> >
>> >> 2. Why will current version runs well? Do pages allocated dynamically
>> >> are automatically aligned to 2MB (large page size)?
>> >>
>> > No, they are 4K pages.
>> Thus why dynamically creating insn_page and alt_insn_page with
>> alloc_page() can get the right result?
> Probably.
>
> --
>                         Gleb.



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 14:01                                 ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-19 14:13                                   ` Gleb Natapov
  2013-06-19 14:20                                     ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-19 14:13 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>; +Cc: Paolo Bonzini, kvm

On Wed, Jun 19, 2013 at 10:01:40PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> I found the final reason! The initial use of init_ram is also used by
> test_rip_relative(), which will cause conflict. I changed it and
> everything runs well.
> 
Not sure what you mean. Your version of test_movabs does not use insn_ram.

> On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Wed, Jun 19, 2013 at 08:30:33PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
> >> >> >> > Send code in a form of a patch.
> >> >> >> >
> >> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> >> >> >> extern u8 insn_page[], insn_page_end[];
> >> >> >> >> extern u8 test_insn[], test_insn_end[];
> >> >> >> >> extern u8 alt_insn_page[];
> >> >> >> >>
> >> >> >> >> asm(
> >> >> >> >> ".align 4096\n\t"
> >> >> >> >> ".global insn_page\n\t"
> >> >> >> >> ".global insn_page_end\n\t"
> >> >> >> >> ".global test_insn\n\t"
> >> >> >> >> ".global test_insn_end\n\t"
> >> >> >> >> "insn_page:\n\t"
> >> >> >> >>
> >> >> >> >> "ret \n\t"
> >> >> >> >>
> >> >> >> >> "push %rax; push %rbx\n\t"
> >> >> >> >> "push %rcx; push %rdx\n\t"
> >> >> >> >> "push %rsi; push %rdi\n\t"
> >> >> >> >> "push %rbp\n\t"
> >> >> >> >> "push %r8; push %r9\n\t"
> >> >> >> >> "push %r10; push %r11\n\t"
> >> >> >> >> "push %r12; push %r13\n\t"
> >> >> >> >> "push %r14; push %r15\n\t"
> >> >> >> >> "pushf\n\t"
> >> >> >> >>
> >> >> >> >> "push 136+save \n\t"
> >> >> >> >> "popf \n\t"
> >> >> >> >> "mov 0+save, %rax \n\t"
> >> >> >> >> "mov 8+save, %rbx \n\t"
> >> >> >> >> "mov 16+save, %rcx \n\t"
> >> >> >> >> "mov 24+save, %rdx \n\t"
> >> >> >> >> "mov 32+save, %rsi \n\t"
> >> >> >> >> "mov 40+save, %rdi \n\t"
> >> >> >> >> "mov 56+save, %rbp \n\t"
> >> >> >> >> "mov 64+save, %r8 \n\t"
> >> >> >> >> "mov 72+save, %r9 \n\t"
> >> >> >> >> "mov 80+save, %r10  \n\t"
> >> >> >> >> "mov 88+save, %r11 \n\t"
> >> >> >> >> "mov 96+save, %r12 \n\t"
> >> >> >> >> "mov 104+save, %r13 \n\t"
> >> >> >> >> "mov 112+save, %r14 \n\t"
> >> >> >> >> "mov 120+save, %r15 \n\t"
> >> >> >> >>
> >> >> >> >> "test_insn:\n\t"
> >> >> >> >> "in  (%dx),%al\n\t"
> >> >> >> >> ". = . + 31\n\t"
> >> >> >> >> "test_insn_end:\n\t"
> >> >> >> >>
> >> >> >> >> "pushf \n\t"
> >> >> >> >> "pop 136+save \n\t"
> >> >> >> >> "mov %rax, 0+save \n\t"
> >> >> >> >> "mov %rbx, 8+save \n\t"
> >> >> >> >> "mov %rcx, 16+save \n\t"
> >> >> >> >> "mov %rdx, 24+save \n\t"
> >> >> >> >> "mov %rsi, 32+save \n\t"
> >> >> >> >> "mov %rdi, 40+save \n\t"
> >> >> >> >> "mov %rbp, 56+save \n\t"
> >> >> >> >> "mov %r8, 64+save \n\t"
> >> >> >> >> "mov %r9, 72+save \n\t"
> >> >> >> >> "mov %r10, 80+save \n\t"
> >> >> >> >> "mov %r11, 88+save \n\t"
> >> >> >> >> "mov %r12, 96+save \n\t"
> >> >> >> >> "mov %r13, 104+save \n\t"
> >> >> >> >> "mov %r14, 112+save \n\t"
> >> >> >> >> "mov %r15, 120+save \n\t"
> >> >> >> >> "popf \n\t"
> >> >> >> >> "pop %r15; pop %r14 \n\t"
> >> >> >> >> "pop %r13; pop %r12 \n\t"
> >> >> >> >> "pop %r11; pop %r10 \n\t"
> >> >> >> >> "pop %r9; pop %r8 \n\t"
> >> >> >> >> "pop %rbp \n\t"
> >> >> >> >> "pop %rdi; pop %rsi \n\t"
> >> >> >> >> "pop %rdx; pop %rcx \n\t"
> >> >> >> >> "pop %rbx; pop %rax \n\t"
> >> >> >> >>
> >> >> >> >> "ret\n\t"
> >> >> >> >> "save:\n\t"
> >> >> >> >> ". = . + 256\n\t"
> >> >> >> >> ".align 4096\n\t"
> >> >> >> >> "alt_insn_page:\n\t"
> >> >> >> >> ". = . + 4096\n\t"
> >> >> >> >> );
> >> >> >> >>
> >> >> >> >>
> >> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
> >> >> >> >> uint8_t *alt_insn, int alt_insn_length)
> >> >> >> >> {
> >> >> >> >>     int i, emul_offset;
> >> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
> >> >> >> >>         test_insn[i] = 0x90; // nop
> >> >> >> > Why? Gcc should pad it with nops.
> >> >> >> >
> >> >> >> >>     emul_offset = test_insn - insn_page;
> >> >> >> >>     for (i=0; i<alt_insn_length; i++)
> >> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
> >> >> >> >> }
> >> >> >> >>
> >> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >> >> >> >> {
> >> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
> >> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
> >> >> >> >>
> >> >> >> >>     memset(alt_insn_page, 0x90, 4096);
> >> >> >> > alt_insn_page should contains the same instruction as insn_page except
> >> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
> >> >> >> > work otherwise.
> >> >> >> In my oponion, only codes between test_insn and test_insn_end in
> >> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
> >> >> >> and when trapping into emulator OS will load alt_insn_page (because of
> >> >> >> invlpg(insn_page)), then return to guest with executing insn_page
> >> >> >> (from TLB).
> >> >> > While before trap the code will likely be executed from insn_page,
> >> >> > but after the trap it is very optimistic to assume that tlb cache
> >> >> > will still contain this virtual address since host will execute quite a
> >> >> > lot of code and can even schedule in the middle, so the TLB will not
> >> >> > contain the address and your test will crash. Even the code before test
> >> >> > instruction can be executed from alt_insn_page if guest is scheduled out
> >> >> > after invlpg() and before it executes every instruction until trapping
> >> >> > one. In your case the test will crash too instead of yielding false positive.
> >> >> >
> >> >> >> I don't know if this is right, but I use this trick in my
> >> >> >> previous patch and it runs well.
> >> >> > Your previous patches always had c3 (ret) after tested instruction on
> >> >> > alt_insn_page.
> >> >> >
> >> >> >>                                  I use "trace-cmd record -e kvm" to
> >> >> >> trace it and found instructions in alt_insn_page are not executed, so
> >> >> >> I suppose that alt_insn_page is not loaded to the right place.
> >> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
> >> >> > since current install_page() implementation cannot handle large pages
> >> >> > and the code is backed up by large pages. You can fix install_page() to
> >> >> > check for that and break large page into small one before installing a
> >> >> > page.
> >> >> Here I have two questions.
> >> >> 1. There's another function called "install_large_page", can it be
> >> >> used to our occasion? I found that this function is not used at all.
> >> > It is used when initial page tables are created.
> >> > See lib/x86/vm.c:setup_mmu_range()
> >> >
> >> >> 2. Why will current version runs well? Do pages allocated dynamically
> >> >> are automatically aligned to 2MB (large page size)?
> >> >>
> >> > No, they are 4K pages.
> >> Thus why dynamically creating insn_page and alt_insn_page with
> >> alloc_page() can get the right result?
> > Probably.
> >
> > --
> >                         Gleb.
> 
> 
> 
> -- 
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 14:13                                   ` Gleb Natapov
@ 2013-06-19 14:20                                     ` 李春奇 <Arthur Chunqi Li>
  0 siblings, 0 replies; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19 14:20 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Paolo Bonzini, kvm

I use insn_ram as what the origin/master done before. I don't know how
to describe it clearly, I will commit a patch later and you can get to
know from my codes.

Arthur

On Wed, Jun 19, 2013 at 10:13 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Wed, Jun 19, 2013 at 10:01:40PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> I found the final reason! The initial use of init_ram is also used by
>> test_rip_relative(), which will cause conflict. I changed it and
>> everything runs well.
>>
> Not sure what you mean. Your version of test_movabs does not use insn_ram.
>
>> On Wed, Jun 19, 2013 at 8:32 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> > On Wed, Jun 19, 2013 at 08:30:33PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> On Wed, Jun 19, 2013 at 8:26 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> > On Wed, Jun 19, 2013 at 08:18:29PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> On Wed, Jun 19, 2013 at 5:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> > On Wed, Jun 19, 2013 at 09:26:59AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> On Wed, Jun 19, 2013 at 12:44 AM, Gleb Natapov <gleb@redhat.com> wrote:
>> >> >> >> > Send code in a form of a patch.
>> >> >> >> >
>> >> >> >> > On Wed, Jun 19, 2013 at 12:14:13AM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> >> >> >> >> extern u8 insn_page[], insn_page_end[];
>> >> >> >> >> extern u8 test_insn[], test_insn_end[];
>> >> >> >> >> extern u8 alt_insn_page[];
>> >> >> >> >>
>> >> >> >> >> asm(
>> >> >> >> >> ".align 4096\n\t"
>> >> >> >> >> ".global insn_page\n\t"
>> >> >> >> >> ".global insn_page_end\n\t"
>> >> >> >> >> ".global test_insn\n\t"
>> >> >> >> >> ".global test_insn_end\n\t"
>> >> >> >> >> "insn_page:\n\t"
>> >> >> >> >>
>> >> >> >> >> "ret \n\t"
>> >> >> >> >>
>> >> >> >> >> "push %rax; push %rbx\n\t"
>> >> >> >> >> "push %rcx; push %rdx\n\t"
>> >> >> >> >> "push %rsi; push %rdi\n\t"
>> >> >> >> >> "push %rbp\n\t"
>> >> >> >> >> "push %r8; push %r9\n\t"
>> >> >> >> >> "push %r10; push %r11\n\t"
>> >> >> >> >> "push %r12; push %r13\n\t"
>> >> >> >> >> "push %r14; push %r15\n\t"
>> >> >> >> >> "pushf\n\t"
>> >> >> >> >>
>> >> >> >> >> "push 136+save \n\t"
>> >> >> >> >> "popf \n\t"
>> >> >> >> >> "mov 0+save, %rax \n\t"
>> >> >> >> >> "mov 8+save, %rbx \n\t"
>> >> >> >> >> "mov 16+save, %rcx \n\t"
>> >> >> >> >> "mov 24+save, %rdx \n\t"
>> >> >> >> >> "mov 32+save, %rsi \n\t"
>> >> >> >> >> "mov 40+save, %rdi \n\t"
>> >> >> >> >> "mov 56+save, %rbp \n\t"
>> >> >> >> >> "mov 64+save, %r8 \n\t"
>> >> >> >> >> "mov 72+save, %r9 \n\t"
>> >> >> >> >> "mov 80+save, %r10  \n\t"
>> >> >> >> >> "mov 88+save, %r11 \n\t"
>> >> >> >> >> "mov 96+save, %r12 \n\t"
>> >> >> >> >> "mov 104+save, %r13 \n\t"
>> >> >> >> >> "mov 112+save, %r14 \n\t"
>> >> >> >> >> "mov 120+save, %r15 \n\t"
>> >> >> >> >>
>> >> >> >> >> "test_insn:\n\t"
>> >> >> >> >> "in  (%dx),%al\n\t"
>> >> >> >> >> ". = . + 31\n\t"
>> >> >> >> >> "test_insn_end:\n\t"
>> >> >> >> >>
>> >> >> >> >> "pushf \n\t"
>> >> >> >> >> "pop 136+save \n\t"
>> >> >> >> >> "mov %rax, 0+save \n\t"
>> >> >> >> >> "mov %rbx, 8+save \n\t"
>> >> >> >> >> "mov %rcx, 16+save \n\t"
>> >> >> >> >> "mov %rdx, 24+save \n\t"
>> >> >> >> >> "mov %rsi, 32+save \n\t"
>> >> >> >> >> "mov %rdi, 40+save \n\t"
>> >> >> >> >> "mov %rbp, 56+save \n\t"
>> >> >> >> >> "mov %r8, 64+save \n\t"
>> >> >> >> >> "mov %r9, 72+save \n\t"
>> >> >> >> >> "mov %r10, 80+save \n\t"
>> >> >> >> >> "mov %r11, 88+save \n\t"
>> >> >> >> >> "mov %r12, 96+save \n\t"
>> >> >> >> >> "mov %r13, 104+save \n\t"
>> >> >> >> >> "mov %r14, 112+save \n\t"
>> >> >> >> >> "mov %r15, 120+save \n\t"
>> >> >> >> >> "popf \n\t"
>> >> >> >> >> "pop %r15; pop %r14 \n\t"
>> >> >> >> >> "pop %r13; pop %r12 \n\t"
>> >> >> >> >> "pop %r11; pop %r10 \n\t"
>> >> >> >> >> "pop %r9; pop %r8 \n\t"
>> >> >> >> >> "pop %rbp \n\t"
>> >> >> >> >> "pop %rdi; pop %rsi \n\t"
>> >> >> >> >> "pop %rdx; pop %rcx \n\t"
>> >> >> >> >> "pop %rbx; pop %rax \n\t"
>> >> >> >> >>
>> >> >> >> >> "ret\n\t"
>> >> >> >> >> "save:\n\t"
>> >> >> >> >> ". = . + 256\n\t"
>> >> >> >> >> ".align 4096\n\t"
>> >> >> >> >> "alt_insn_page:\n\t"
>> >> >> >> >> ". = . + 4096\n\t"
>> >> >> >> >> );
>> >> >> >> >>
>> >> >> >> >>
>> >> >> >> >> static void mk_insn_page(uint8_t *alt_insn_page,
>> >> >> >> >> uint8_t *alt_insn, int alt_insn_length)
>> >> >> >> >> {
>> >> >> >> >>     int i, emul_offset;
>> >> >> >> >>     for (i=1; i<test_insn_end - test_insn; i++)
>> >> >> >> >>         test_insn[i] = 0x90; // nop
>> >> >> >> > Why? Gcc should pad it with nops.
>> >> >> >> >
>> >> >> >> >>     emul_offset = test_insn - insn_page;
>> >> >> >> >>     for (i=0; i<alt_insn_length; i++)
>> >> >> >> >>         alt_insn_page[i+emul_offset] = alt_insn[i];
>> >> >> >> >> }
>> >> >> >> >>
>> >> >> >> >> static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> >> >> >> >> {
>> >> >> >> >>     ulong *cr3 = (ulong *)read_cr3();
>> >> >> >> >>     int save_offset = (u8 *)(&save) - insn_page;
>> >> >> >> >>
>> >> >> >> >>     memset(alt_insn_page, 0x90, 4096);
>> >> >> >> > alt_insn_page should contains the same instruction as insn_page except
>> >> >> >> > between test_insn and test_insn_end. I do not know how you expect it to
>> >> >> >> > work otherwise.
>> >> >> >> In my oponion, only codes between test_insn and test_insn_end in
>> >> >> >> alt_insn_page need to be set, insn_page will be executed in the guest,
>> >> >> >> and when trapping into emulator OS will load alt_insn_page (because of
>> >> >> >> invlpg(insn_page)), then return to guest with executing insn_page
>> >> >> >> (from TLB).
>> >> >> > While before trap the code will likely be executed from insn_page,
>> >> >> > but after the trap it is very optimistic to assume that tlb cache
>> >> >> > will still contain this virtual address since host will execute quite a
>> >> >> > lot of code and can even schedule in the middle, so the TLB will not
>> >> >> > contain the address and your test will crash. Even the code before test
>> >> >> > instruction can be executed from alt_insn_page if guest is scheduled out
>> >> >> > after invlpg() and before it executes every instruction until trapping
>> >> >> > one. In your case the test will crash too instead of yielding false positive.
>> >> >> >
>> >> >> >> I don't know if this is right, but I use this trick in my
>> >> >> >> previous patch and it runs well.
>> >> >> > Your previous patches always had c3 (ret) after tested instruction on
>> >> >> > alt_insn_page.
>> >> >> >
>> >> >> >>                                  I use "trace-cmd record -e kvm" to
>> >> >> >> trace it and found instructions in alt_insn_page are not executed, so
>> >> >> >> I suppose that alt_insn_page is not loaded to the right place.
>> >> >> > Do you see "in" instruction emulated? Anyway current code is incorrect
>> >> >> > since current install_page() implementation cannot handle large pages
>> >> >> > and the code is backed up by large pages. You can fix install_page() to
>> >> >> > check for that and break large page into small one before installing a
>> >> >> > page.
>> >> >> Here I have two questions.
>> >> >> 1. There's another function called "install_large_page", can it be
>> >> >> used to our occasion? I found that this function is not used at all.
>> >> > It is used when initial page tables are created.
>> >> > See lib/x86/vm.c:setup_mmu_range()
>> >> >
>> >> >> 2. Why will current version runs well? Do pages allocated dynamically
>> >> >> are automatically aligned to 2MB (large page size)?
>> >> >>
>> >> > No, they are 4K pages.
>> >> Thus why dynamically creating insn_page and alt_insn_page with
>> >> alloc_page() can get the right result?
>> > Probably.
>> >
>> > --
>> >                         Gleb.
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
>
> --
>                         Gleb.



-- 
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-19 15:00 Arthur Chunqi Li
  2013-06-19 15:07 ` 李春奇 <Arthur Chunqi Li>
  2013-06-20  8:48 ` Gleb Natapov
  0 siblings, 2 replies; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-19 15:00 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, jan.kiszka, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 mode change 100644 => 100755 x86/emulator.c

diff --git a/x86/emulator.c b/x86/emulator.c
old mode 100644
new mode 100755
index 96576e5..48d45c8
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,15 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 r8, r9, r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 rip, rflags;
+};
+struct regs inregs, outregs, save;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+#define INSN_SAVE 			\
+	"ret\n\t"				\
+	"pushf\n\t"			\
+	"push 136+save \n\t"		\
+	"popf \n\t"			\
+	"xchg %rax, 0+save \n\t"		\
+	"xchg %rbx, 8+save \n\t"		\
+	"xchg %rcx, 16+save \n\t"		\
+	"xchg %rdx, 24+save \n\t"		\
+	"xchg %rsi, 32+save \n\t"		\
+	"xchg %rdi, 40+save \n\t"		\
+	"xchg %rsp, 48+save \n\t"		\
+	"xchg %rbp, 56+save \n\t"		\
+	"xchg %r8, 64+save \n\t"		\
+	"xchg %r9, 72+save \n\t"		\
+	"xchg %r10, 80+save \n\t"		\
+	"xchg %r11, 88+save \n\t"		\
+	"xchg %r12, 96+save \n\t"		\
+	"xchg %r13, 104+save \n\t"		\
+	"xchg %r14, 112+save \n\t"		\
+	"xchg %r15, 120+save \n\t"		\
+
+#define INSN_RESTORE			\
+	"xchg %rax, 0+save \n\t"		\
+	"xchg %rbx, 8+save \n\t"		\
+	"xchg %rcx, 16+save \n\t"		\
+	"xchg %rdx, 24+save \n\t"		\
+	"xchg %rsi, 32+save \n\t"		\
+	"xchg %rdi, 40+save \n\t"		\
+	"xchg %rsp, 48+save \n\t"		\
+	"xchg %rbp, 56+save \n\t"		\
+	"xchg %r8, 64+save \n\t"		\
+	"xchg %r9, 72+save \n\t"		\
+	"xchg %r10, 80+save \n\t"		\
+	"xchg %r11, 88+save \n\t"		\
+	"xchg %r12, 96+save \n\t"		\
+	"xchg %r13, 104+save \n\t"		\
+	"xchg %r14, 112+save \n\t"		\
+	"xchg %r15, 120+save \n\t"		\
+	"pushf \n\t"			\
+	"pop 136+save \n\t"		\
+	"popf \n\t"			\
+	"ret \n\t"				\
+
+#define INSN_TRAP			\
+	"in  (%dx),%al\n\t"			\
+	". = . + 31\n\t"			\
+
+asm(
+	".align 4096\n\t"
+	"insn_page:\n\t"
+	INSN_SAVE
+	"test_insn:\n\t"
+	INSN_TRAP
+	"test_insn_end:\n\t"
+	INSN_RESTORE
+	"insn_page_end:\n\t"
+	".align 4096\n\t"
+
+	"alt_insn_page:\n\t"
+	INSN_SAVE
+	"alt_test_insn:\n\t"
+	INSN_TRAP
+	"alt_test_insn_end:\n\t"
+	INSN_RESTORE
+	"alt_insn_page_end:\n\t"
+	".align 4096\n\t"
+);
+
+static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	void *insn_ram;
+	int i;
+	extern u8 insn_page[], test_insn[], test_insn_end[];
+	extern u8 alt_insn_page[], alt_test_insn[];
+
+	insn_ram = vmap(virt_to_phys(insn_page), 4096);
+	for (i=1; i<test_insn_end - test_insn; i++)
+		alt_test_insn[i] = test_insn[i] = 0x90; // nop
+	for (i=0; i<alt_insn_length; i++)
+		alt_test_insn[i] = alt_insn[i];
+	for(;i<test_insn_end - test_insn; i++)
+		alt_test_insn[i] = 0x90; // nop
+	save = inregs;
+
+	// Load the code TLB with insn_page, but point the page tables at
+	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	// This will make the CPU trap on the insn_page instruction but the
+	// hypervisor will see alt_insn_page.
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	// Load code TLB
+	asm volatile("call *%0" : : "r"(insn_ram));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	// Trap, let hypervisor emulate at alt_insn_page
+	asm volatile("call *%0": : "r"(insn_ram+1));
+
+	outregs = save;
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 15:00 Arthur Chunqi Li
@ 2013-06-19 15:07 ` 李春奇 <Arthur Chunqi Li>
  2013-06-19 16:03   ` Gleb Natapov
  2013-06-20  8:48 ` Gleb Natapov
  1 sibling, 1 reply; 54+ messages in thread
From: 李春奇 <Arthur Chunqi Li> @ 2013-06-19 15:07 UTC (permalink / raw)
  To: kvm; +Cc: Gleb Natapov, Paolo Bonzini, Jan Kiszka, Arthur Chunqi Li

Hi Gleb,
This version can set %rsp before trapping into emulator, because
insn_page and alt_insn_page is statically defined and their relative
position to (save) is fixed during execution.

In this way, test case of test_mmx_movq_mf needs to pre-define its own
stack, this change is in the next patch.

In this version, insn_ram is initially mapped to insn_page and them
each call to insn_page/alt_insn_page are all via insn_ram. This trick
runs well but I don't know why my previous version causes error.

Arthur.
On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 110 insertions(+)
>  mode change 100644 => 100755 x86/emulator.c
>
> diff --git a/x86/emulator.c b/x86/emulator.c
> old mode 100644
> new mode 100755
> index 96576e5..48d45c8
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,15 @@ int fails, tests;
>
>  static int exceptions;
>
> +struct regs {
> +       u64 rax, rbx, rcx, rdx;
> +       u64 rsi, rdi, rsp, rbp;
> +       u64 r8, r9, r10, r11;
> +       u64 r12, r13, r14, r15;
> +       u64 rip, rflags;
> +};
> +struct regs inregs, outregs, save;
> +
>  void report(const char *name, int result)
>  {
>         ++tests;
> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>
> +#define INSN_SAVE                      \
> +       "ret\n\t"                               \
> +       "pushf\n\t"                     \
> +       "push 136+save \n\t"            \
> +       "popf \n\t"                     \
> +       "xchg %rax, 0+save \n\t"                \
> +       "xchg %rbx, 8+save \n\t"                \
> +       "xchg %rcx, 16+save \n\t"               \
> +       "xchg %rdx, 24+save \n\t"               \
> +       "xchg %rsi, 32+save \n\t"               \
> +       "xchg %rdi, 40+save \n\t"               \
> +       "xchg %rsp, 48+save \n\t"               \
> +       "xchg %rbp, 56+save \n\t"               \
> +       "xchg %r8, 64+save \n\t"                \
> +       "xchg %r9, 72+save \n\t"                \
> +       "xchg %r10, 80+save \n\t"               \
> +       "xchg %r11, 88+save \n\t"               \
> +       "xchg %r12, 96+save \n\t"               \
> +       "xchg %r13, 104+save \n\t"              \
> +       "xchg %r14, 112+save \n\t"              \
> +       "xchg %r15, 120+save \n\t"              \
> +
> +#define INSN_RESTORE                   \
> +       "xchg %rax, 0+save \n\t"                \
> +       "xchg %rbx, 8+save \n\t"                \
> +       "xchg %rcx, 16+save \n\t"               \
> +       "xchg %rdx, 24+save \n\t"               \
> +       "xchg %rsi, 32+save \n\t"               \
> +       "xchg %rdi, 40+save \n\t"               \
> +       "xchg %rsp, 48+save \n\t"               \
> +       "xchg %rbp, 56+save \n\t"               \
> +       "xchg %r8, 64+save \n\t"                \
> +       "xchg %r9, 72+save \n\t"                \
> +       "xchg %r10, 80+save \n\t"               \
> +       "xchg %r11, 88+save \n\t"               \
> +       "xchg %r12, 96+save \n\t"               \
> +       "xchg %r13, 104+save \n\t"              \
> +       "xchg %r14, 112+save \n\t"              \
> +       "xchg %r15, 120+save \n\t"              \
> +       "pushf \n\t"                    \
> +       "pop 136+save \n\t"             \
> +       "popf \n\t"                     \
> +       "ret \n\t"                              \
> +
> +#define INSN_TRAP                      \
> +       "in  (%dx),%al\n\t"                     \
> +       ". = . + 31\n\t"                        \
> +
> +asm(
> +       ".align 4096\n\t"
> +       "insn_page:\n\t"
> +       INSN_SAVE
> +       "test_insn:\n\t"
> +       INSN_TRAP
> +       "test_insn_end:\n\t"
> +       INSN_RESTORE
> +       "insn_page_end:\n\t"
> +       ".align 4096\n\t"
> +
> +       "alt_insn_page:\n\t"
> +       INSN_SAVE
> +       "alt_test_insn:\n\t"
> +       INSN_TRAP
> +       "alt_test_insn_end:\n\t"
> +       INSN_RESTORE
> +       "alt_insn_page_end:\n\t"
> +       ".align 4096\n\t"
> +);
> +
> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> +{
> +       ulong *cr3 = (ulong *)read_cr3();
> +       void *insn_ram;
> +       int i;
> +       extern u8 insn_page[], test_insn[], test_insn_end[];
> +       extern u8 alt_insn_page[], alt_test_insn[];
> +
> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> +       for (i=1; i<test_insn_end - test_insn; i++)
> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> +       for (i=0; i<alt_insn_length; i++)
> +               alt_test_insn[i] = alt_insn[i];
> +       for(;i<test_insn_end - test_insn; i++)
> +               alt_test_insn[i] = 0x90; // nop
> +       save = inregs;
> +
> +       // Load the code TLB with insn_page, but point the page tables at
> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +       // This will make the CPU trap on the insn_page instruction but the
> +       // hypervisor will see alt_insn_page.
> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +       invlpg(insn_ram);
> +       // Load code TLB
> +       asm volatile("call *%0" : : "r"(insn_ram));
> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +       // Trap, let hypervisor emulate at alt_insn_page
> +       asm volatile("call *%0": : "r"(insn_ram+1));
> +
> +       outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> --
> 1.7.9.5
>



--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 15:07 ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-19 16:03   ` Gleb Natapov
  2013-06-19 17:48     ` Gmail
  2013-06-20  8:29     ` Paolo Bonzini
  0 siblings, 2 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-19 16:03 UTC (permalink / raw)
  To: 李春奇 <Arthur Chunqi Li>
  Cc: kvm, Paolo Bonzini, Jan Kiszka

On Wed, Jun 19, 2013 at 11:07:18PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> Hi Gleb,
> This version can set %rsp before trapping into emulator, because
> insn_page and alt_insn_page is statically defined and their relative
> position to (save) is fixed during execution.
> 
The position of the code is not fixed during execution since you execute
it from a virtual address obtained dynamically by vmap() and the address
is definitely different from the one the code was compiled for, but if
you look at the code that compile actually produce you will see that it
uses absolute address to access "save" and this is why it works. I
wounder why compiler decided to use absolute address this time, Paolo?

> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> stack, this change is in the next patch.
> 
> In this version, insn_ram is initially mapped to insn_page and them
> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> runs well but I don't know why my previous version causes error.
> 
Because previous version tried to use install_page() on a large page
mapped region and the function does not know how to handle that.

> Arthur.
> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> > Add a function trap_emulator to run an instruction in emulator.
> > Set inregs first (%rax is invalid because it is used as return
> > address), put instruction codec in alt_insn and call func with
> > alt_insn_length. Get results in outregs.
> >
> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> > ---
> >  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 110 insertions(+)
> >  mode change 100644 => 100755 x86/emulator.c
> >
> > diff --git a/x86/emulator.c b/x86/emulator.c
> > old mode 100644
> > new mode 100755
> > index 96576e5..48d45c8
> > --- a/x86/emulator.c
> > +++ b/x86/emulator.c
> > @@ -11,6 +11,15 @@ int fails, tests;
> >
> >  static int exceptions;
> >
> > +struct regs {
> > +       u64 rax, rbx, rcx, rdx;
> > +       u64 rsi, rdi, rsp, rbp;
> > +       u64 r8, r9, r10, r11;
> > +       u64 r12, r13, r14, r15;
> > +       u64 rip, rflags;
> > +};
> > +struct regs inregs, outregs, save;
> > +
> >  void report(const char *name, int result)
> >  {
> >         ++tests;
> > @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
> >      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >  }
> >
> > +#define INSN_SAVE                      \
> > +       "ret\n\t"                               \
> > +       "pushf\n\t"                     \
> > +       "push 136+save \n\t"            \
> > +       "popf \n\t"                     \
> > +       "xchg %rax, 0+save \n\t"                \
> > +       "xchg %rbx, 8+save \n\t"                \
> > +       "xchg %rcx, 16+save \n\t"               \
> > +       "xchg %rdx, 24+save \n\t"               \
> > +       "xchg %rsi, 32+save \n\t"               \
> > +       "xchg %rdi, 40+save \n\t"               \
> > +       "xchg %rsp, 48+save \n\t"               \
> > +       "xchg %rbp, 56+save \n\t"               \
> > +       "xchg %r8, 64+save \n\t"                \
> > +       "xchg %r9, 72+save \n\t"                \
> > +       "xchg %r10, 80+save \n\t"               \
> > +       "xchg %r11, 88+save \n\t"               \
> > +       "xchg %r12, 96+save \n\t"               \
> > +       "xchg %r13, 104+save \n\t"              \
> > +       "xchg %r14, 112+save \n\t"              \
> > +       "xchg %r15, 120+save \n\t"              \
> > +
> > +#define INSN_RESTORE                   \
> > +       "xchg %rax, 0+save \n\t"                \
> > +       "xchg %rbx, 8+save \n\t"                \
> > +       "xchg %rcx, 16+save \n\t"               \
> > +       "xchg %rdx, 24+save \n\t"               \
> > +       "xchg %rsi, 32+save \n\t"               \
> > +       "xchg %rdi, 40+save \n\t"               \
> > +       "xchg %rsp, 48+save \n\t"               \
> > +       "xchg %rbp, 56+save \n\t"               \
> > +       "xchg %r8, 64+save \n\t"                \
> > +       "xchg %r9, 72+save \n\t"                \
> > +       "xchg %r10, 80+save \n\t"               \
> > +       "xchg %r11, 88+save \n\t"               \
> > +       "xchg %r12, 96+save \n\t"               \
> > +       "xchg %r13, 104+save \n\t"              \
> > +       "xchg %r14, 112+save \n\t"              \
> > +       "xchg %r15, 120+save \n\t"              \
> > +       "pushf \n\t"                    \
> > +       "pop 136+save \n\t"             \
> > +       "popf \n\t"                     \
> > +       "ret \n\t"                              \
> > +
> > +#define INSN_TRAP                      \
> > +       "in  (%dx),%al\n\t"                     \
> > +       ". = . + 31\n\t"                        \
> > +
> > +asm(
> > +       ".align 4096\n\t"
> > +       "insn_page:\n\t"
> > +       INSN_SAVE
> > +       "test_insn:\n\t"
> > +       INSN_TRAP
> > +       "test_insn_end:\n\t"
> > +       INSN_RESTORE
> > +       "insn_page_end:\n\t"
> > +       ".align 4096\n\t"
> > +
> > +       "alt_insn_page:\n\t"
> > +       INSN_SAVE
> > +       "alt_test_insn:\n\t"
> > +       INSN_TRAP
> > +       "alt_test_insn_end:\n\t"
> > +       INSN_RESTORE
> > +       "alt_insn_page_end:\n\t"
> > +       ".align 4096\n\t"
> > +);
> > +
> > +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> > +{
> > +       ulong *cr3 = (ulong *)read_cr3();
> > +       void *insn_ram;
> > +       int i;
> > +       extern u8 insn_page[], test_insn[], test_insn_end[];
> > +       extern u8 alt_insn_page[], alt_test_insn[];
> > +
> > +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> > +       for (i=1; i<test_insn_end - test_insn; i++)
> > +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> > +       for (i=0; i<alt_insn_length; i++)
> > +               alt_test_insn[i] = alt_insn[i];
> > +       for(;i<test_insn_end - test_insn; i++)
> > +               alt_test_insn[i] = 0x90; // nop
> > +       save = inregs;
> > +
> > +       // Load the code TLB with insn_page, but point the page tables at
> > +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> > +       // This will make the CPU trap on the insn_page instruction but the
> > +       // hypervisor will see alt_insn_page.
> > +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> > +       invlpg(insn_ram);
> > +       // Load code TLB
> > +       asm volatile("call *%0" : : "r"(insn_ram));
> > +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> > +       // Trap, let hypervisor emulate at alt_insn_page
> > +       asm volatile("call *%0": : "r"(insn_ram+1));
> > +
> > +       outregs = save;
> > +}
> > +
> >  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >  {
> >      ++exceptions;
> > --
> > 1.7.9.5
> >
> 
> 
> 
> --
> Arthur Chunqi Li
> Department of Computer Science
> School of EECS
> Peking University
> Beijing, China

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 16:03   ` Gleb Natapov
@ 2013-06-19 17:48     ` Gmail
  2013-06-20  5:42       ` Gleb Natapov
  2013-06-20  8:29     ` Paolo Bonzini
  1 sibling, 1 reply; 54+ messages in thread
From: Gmail @ 2013-06-19 17:48 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, Paolo Bonzini, Jan Kiszka


在 2013-6-20,0:03,Gleb Natapov <gleb@redhat.com> 写道:

> On Wed, Jun 19, 2013 at 11:07:18PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> This version can set %rsp before trapping into emulator, because
>> insn_page and alt_insn_page is statically defined and their relative
>> position to (save) is fixed during execution.
> The position of the code is not fixed during execution since you execute
> it from a virtual address obtained dynamically by vmap() and the address
> is definitely different from the one the code was compiled for, but if
> you look at the code that compile actually produce you will see that it
> uses absolute address to access "save" and this is why it works. I
> wounder why compiler decided to use absolute address this time, Paolo?
> 
>> In this way, test case of test_mmx_movq_mf needs to pre-define its own
>> stack, this change is in the next patch.
>> 
>> In this version, insn_ram is initially mapped to insn_page and them
>> each call to insn_page/alt_insn_page are all via insn_ram. This trick
>> runs well but I don't know why my previous version causes error.
> Because previous version tried to use install_page() on a large page
> mapped region and the function does not know how to handle that.
I don't quite understand what you mean here. What is the differences between large page and 4k page in this test case? Maybe I don't understand the differences of install_pte() with 4k page and 2m pages.
> 
>> Arthur.
>> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>> 
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>> x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 110 insertions(+)
>>> mode change 100644 => 100755 x86/emulator.c
>>> 
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> old mode 100644
>>> new mode 100755
>>> index 96576e5..48d45c8
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,15 @@ int fails, tests;
>>> 
>>> static int exceptions;
>>> 
>>> +struct regs {
>>> +       u64 rax, rbx, rcx, rdx;
>>> +       u64 rsi, rdi, rsp, rbp;
>>> +       u64 r8, r9, r10, r11;
>>> +       u64 r12, r13, r14, r15;
>>> +       u64 rip, rflags;
>>> +};
>>> +struct regs inregs, outregs, save;
>>> +
>>> void report(const char *name, int result)
>>> {
>>>       ++tests;
>>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>>    report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>> }
>>> 
>>> +#define INSN_SAVE                      \
>>> +       "ret\n\t"                               \
>>> +       "pushf\n\t"                     \
>>> +       "push 136+save \n\t"            \
>>> +       "popf \n\t"                     \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +
>>> +#define INSN_RESTORE                   \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +       "pushf \n\t"                    \
>>> +       "pop 136+save \n\t"             \
>>> +       "popf \n\t"                     \
>>> +       "ret \n\t"                              \
>>> +
>>> +#define INSN_TRAP                      \
>>> +       "in  (%dx),%al\n\t"                     \
>>> +       ". = . + 31\n\t"                        \
>>> +
>>> +asm(
>>> +       ".align 4096\n\t"
>>> +       "insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +
>>> +       "alt_insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "alt_test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "alt_test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "alt_insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +);
>>> +
>>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>>> +{
>>> +       ulong *cr3 = (ulong *)read_cr3();
>>> +       void *insn_ram;
>>> +       int i;
>>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
>>> +       extern u8 alt_insn_page[], alt_test_insn[];
>>> +
>>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
>>> +       for (i=1; i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
>>> +       for (i=0; i<alt_insn_length; i++)
>>> +               alt_test_insn[i] = alt_insn[i];
>>> +       for(;i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = 0x90; // nop
>>> +       save = inregs;
>>> +
>>> +       // Load the code TLB with insn_page, but point the page tables at
>>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +       // This will make the CPU trap on the insn_page instruction but the
>>> +       // hypervisor will see alt_insn_page.
>>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +       invlpg(insn_ram);
>>> +       // Load code TLB
>>> +       asm volatile("call *%0" : : "r"(insn_ram));
>>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +       // Trap, let hypervisor emulate at alt_insn_page
>>> +       asm volatile("call *%0": : "r"(insn_ram+1));
>>> +
>>> +       outregs = save;
>>> +}
>>> +
>>> static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>> {
>>>    ++exceptions;
>>> --
>>> 1.7.9.5
>> 
>> 
>> 
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> --
>           Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 17:48     ` Gmail
@ 2013-06-20  5:42       ` Gleb Natapov
  0 siblings, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-20  5:42 UTC (permalink / raw)
  To: Gmail; +Cc: kvm, Paolo Bonzini, Jan Kiszka

On Thu, Jun 20, 2013 at 01:48:39AM +0800, Gmail wrote:
> 
> 在 2013-6-20,0:03,Gleb Natapov <gleb@redhat.com> 写道:
> 
> > On Wed, Jun 19, 2013 at 11:07:18PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> Hi Gleb,
> >> This version can set %rsp before trapping into emulator, because
> >> insn_page and alt_insn_page is statically defined and their relative
> >> position to (save) is fixed during execution.
> > The position of the code is not fixed during execution since you execute
> > it from a virtual address obtained dynamically by vmap() and the address
> > is definitely different from the one the code was compiled for, but if
> > you look at the code that compile actually produce you will see that it
> > uses absolute address to access "save" and this is why it works. I
> > wounder why compiler decided to use absolute address this time, Paolo?
> > 
> >> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> >> stack, this change is in the next patch.
> >> 
> >> In this version, insn_ram is initially mapped to insn_page and them
> >> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> >> runs well but I don't know why my previous version causes error.
> > Because previous version tried to use install_page() on a large page
> > mapped region and the function does not know how to handle that.
> I don't quite understand what you mean here. What is the differences between large page and 4k page in this test case?
Test assumes 4k page size.

> Maybe I don't understand the differences of install_pte() with 4k page and 2m pages.
May be. You cannot install 4k page in place of 2m page before breaking
the later to 512 4k pages.

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 16:03   ` Gleb Natapov
  2013-06-19 17:48     ` Gmail
@ 2013-06-20  8:29     ` Paolo Bonzini
  2013-06-20  8:31       ` Gleb Natapov
  1 sibling, 1 reply; 54+ messages in thread
From: Paolo Bonzini @ 2013-06-20  8:29 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: "李春奇 <Arthur Chunqi Li>", kvm,
	Jan Kiszka

Il 19/06/2013 18:03, Gleb Natapov ha scritto:
> On Wed, Jun 19, 2013 at 11:07:18PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
>> Hi Gleb,
>> This version can set %rsp before trapping into emulator, because
>> insn_page and alt_insn_page is statically defined and their relative
>> position to (save) is fixed during execution.
>>
> The position of the code is not fixed during execution since you execute
> it from a virtual address obtained dynamically by vmap() and the address
> is definitely different from the one the code was compiled for, but if
> you look at the code that compile actually produce you will see that it
> uses absolute address to access "save" and this is why it works. I
> wounder why compiler decided to use absolute address this time, Paolo?

Because he's using assembly with operands that he wrote himself.  Before
he was using "m" and the compiler decided to express the memory operand
as "save(%rip)".

The assembler then emits different opcodes (of course) and also
different relocations.  In the current code, it tells the linker to
place an absolute address.  In the previous one, it tells the linker to
place a delta from %rip.

Paolo

>> In this way, test case of test_mmx_movq_mf needs to pre-define its own
>> stack, this change is in the next patch.
>>
>> In this version, insn_ram is initially mapped to insn_page and them
>> each call to insn_page/alt_insn_page are all via insn_ram. This trick
>> runs well but I don't know why my previous version causes error.
>>
> Because previous version tried to use install_page() on a large page
> mapped region and the function does not know how to handle that.
> 
>> Arthur.
>> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
>>> Add a function trap_emulator to run an instruction in emulator.
>>> Set inregs first (%rax is invalid because it is used as return
>>> address), put instruction codec in alt_insn and call func with
>>> alt_insn_length. Get results in outregs.
>>>
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 110 insertions(+)
>>>  mode change 100644 => 100755 x86/emulator.c
>>>
>>> diff --git a/x86/emulator.c b/x86/emulator.c
>>> old mode 100644
>>> new mode 100755
>>> index 96576e5..48d45c8
>>> --- a/x86/emulator.c
>>> +++ b/x86/emulator.c
>>> @@ -11,6 +11,15 @@ int fails, tests;
>>>
>>>  static int exceptions;
>>>
>>> +struct regs {
>>> +       u64 rax, rbx, rcx, rdx;
>>> +       u64 rsi, rdi, rsp, rbp;
>>> +       u64 r8, r9, r10, r11;
>>> +       u64 r12, r13, r14, r15;
>>> +       u64 rip, rflags;
>>> +};
>>> +struct regs inregs, outregs, save;
>>> +
>>>  void report(const char *name, int result)
>>>  {
>>>         ++tests;
>>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>>>  }
>>>
>>> +#define INSN_SAVE                      \
>>> +       "ret\n\t"                               \
>>> +       "pushf\n\t"                     \
>>> +       "push 136+save \n\t"            \
>>> +       "popf \n\t"                     \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +
>>> +#define INSN_RESTORE                   \
>>> +       "xchg %rax, 0+save \n\t"                \
>>> +       "xchg %rbx, 8+save \n\t"                \
>>> +       "xchg %rcx, 16+save \n\t"               \
>>> +       "xchg %rdx, 24+save \n\t"               \
>>> +       "xchg %rsi, 32+save \n\t"               \
>>> +       "xchg %rdi, 40+save \n\t"               \
>>> +       "xchg %rsp, 48+save \n\t"               \
>>> +       "xchg %rbp, 56+save \n\t"               \
>>> +       "xchg %r8, 64+save \n\t"                \
>>> +       "xchg %r9, 72+save \n\t"                \
>>> +       "xchg %r10, 80+save \n\t"               \
>>> +       "xchg %r11, 88+save \n\t"               \
>>> +       "xchg %r12, 96+save \n\t"               \
>>> +       "xchg %r13, 104+save \n\t"              \
>>> +       "xchg %r14, 112+save \n\t"              \
>>> +       "xchg %r15, 120+save \n\t"              \
>>> +       "pushf \n\t"                    \
>>> +       "pop 136+save \n\t"             \
>>> +       "popf \n\t"                     \
>>> +       "ret \n\t"                              \
>>> +
>>> +#define INSN_TRAP                      \
>>> +       "in  (%dx),%al\n\t"                     \
>>> +       ". = . + 31\n\t"                        \
>>> +
>>> +asm(
>>> +       ".align 4096\n\t"
>>> +       "insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +
>>> +       "alt_insn_page:\n\t"
>>> +       INSN_SAVE
>>> +       "alt_test_insn:\n\t"
>>> +       INSN_TRAP
>>> +       "alt_test_insn_end:\n\t"
>>> +       INSN_RESTORE
>>> +       "alt_insn_page_end:\n\t"
>>> +       ".align 4096\n\t"
>>> +);
>>> +
>>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>>> +{
>>> +       ulong *cr3 = (ulong *)read_cr3();
>>> +       void *insn_ram;
>>> +       int i;
>>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
>>> +       extern u8 alt_insn_page[], alt_test_insn[];
>>> +
>>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
>>> +       for (i=1; i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
>>> +       for (i=0; i<alt_insn_length; i++)
>>> +               alt_test_insn[i] = alt_insn[i];
>>> +       for(;i<test_insn_end - test_insn; i++)
>>> +               alt_test_insn[i] = 0x90; // nop
>>> +       save = inregs;
>>> +
>>> +       // Load the code TLB with insn_page, but point the page tables at
>>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>>> +       // This will make the CPU trap on the insn_page instruction but the
>>> +       // hypervisor will see alt_insn_page.
>>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
>>> +       invlpg(insn_ram);
>>> +       // Load code TLB
>>> +       asm volatile("call *%0" : : "r"(insn_ram));
>>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>>> +       // Trap, let hypervisor emulate at alt_insn_page
>>> +       asm volatile("call *%0": : "r"(insn_ram+1));
>>> +
>>> +       outregs = save;
>>> +}
>>> +
>>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>>>  {
>>>      ++exceptions;
>>> --
>>> 1.7.9.5
>>>
>>
>>
>>
>> --
>> Arthur Chunqi Li
>> Department of Computer Science
>> School of EECS
>> Peking University
>> Beijing, China
> 
> --
> 			Gleb.
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-20  8:29     ` Paolo Bonzini
@ 2013-06-20  8:31       ` Gleb Natapov
  0 siblings, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-20  8:31 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: "李春奇 <Arthur Chunqi Li>", kvm,
	Jan Kiszka

On Thu, Jun 20, 2013 at 10:29:42AM +0200, Paolo Bonzini wrote:
> Il 19/06/2013 18:03, Gleb Natapov ha scritto:
> > On Wed, Jun 19, 2013 at 11:07:18PM +0800, 李春奇 <Arthur Chunqi Li> wrote:
> >> Hi Gleb,
> >> This version can set %rsp before trapping into emulator, because
> >> insn_page and alt_insn_page is statically defined and their relative
> >> position to (save) is fixed during execution.
> >>
> > The position of the code is not fixed during execution since you execute
> > it from a virtual address obtained dynamically by vmap() and the address
> > is definitely different from the one the code was compiled for, but if
> > you look at the code that compile actually produce you will see that it
> > uses absolute address to access "save" and this is why it works. I
> > wounder why compiler decided to use absolute address this time, Paolo?
> 
> Because he's using assembly with operands that he wrote himself.  Before
> he was using "m" and the compiler decided to express the memory operand
> as "save(%rip)".
> 
> The assembler then emits different opcodes (of course) and also
> different relocations.  In the current code, it tells the linker to
> place an absolute address.  In the previous one, it tells the linker to
> place a delta from %rip.
> 
Heh, make sense. OK, so we will go with that. Will comment on the patch
itself.

> Paolo
> 
> >> In this way, test case of test_mmx_movq_mf needs to pre-define its own
> >> stack, this change is in the next patch.
> >>
> >> In this version, insn_ram is initially mapped to insn_page and them
> >> each call to insn_page/alt_insn_page are all via insn_ram. This trick
> >> runs well but I don't know why my previous version causes error.
> >>
> > Because previous version tried to use install_page() on a large page
> > mapped region and the function does not know how to handle that.
> > 
> >> Arthur.
> >> On Wed, Jun 19, 2013 at 11:00 PM, Arthur Chunqi Li <yzt356@gmail.com> wrote:
> >>> Add a function trap_emulator to run an instruction in emulator.
> >>> Set inregs first (%rax is invalid because it is used as return
> >>> address), put instruction codec in alt_insn and call func with
> >>> alt_insn_length. Get results in outregs.
> >>>
> >>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >>> ---
> >>>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 110 insertions(+)
> >>>  mode change 100644 => 100755 x86/emulator.c
> >>>
> >>> diff --git a/x86/emulator.c b/x86/emulator.c
> >>> old mode 100644
> >>> new mode 100755
> >>> index 96576e5..48d45c8
> >>> --- a/x86/emulator.c
> >>> +++ b/x86/emulator.c
> >>> @@ -11,6 +11,15 @@ int fails, tests;
> >>>
> >>>  static int exceptions;
> >>>
> >>> +struct regs {
> >>> +       u64 rax, rbx, rcx, rdx;
> >>> +       u64 rsi, rdi, rsp, rbp;
> >>> +       u64 r8, r9, r10, r11;
> >>> +       u64 r12, r13, r14, r15;
> >>> +       u64 rip, rflags;
> >>> +};
> >>> +struct regs inregs, outregs, save;
> >>> +
> >>>  void report(const char *name, int result)
> >>>  {
> >>>         ++tests;
> >>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
> >>>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
> >>>  }
> >>>
> >>> +#define INSN_SAVE                      \
> >>> +       "ret\n\t"                               \
> >>> +       "pushf\n\t"                     \
> >>> +       "push 136+save \n\t"            \
> >>> +       "popf \n\t"                     \
> >>> +       "xchg %rax, 0+save \n\t"                \
> >>> +       "xchg %rbx, 8+save \n\t"                \
> >>> +       "xchg %rcx, 16+save \n\t"               \
> >>> +       "xchg %rdx, 24+save \n\t"               \
> >>> +       "xchg %rsi, 32+save \n\t"               \
> >>> +       "xchg %rdi, 40+save \n\t"               \
> >>> +       "xchg %rsp, 48+save \n\t"               \
> >>> +       "xchg %rbp, 56+save \n\t"               \
> >>> +       "xchg %r8, 64+save \n\t"                \
> >>> +       "xchg %r9, 72+save \n\t"                \
> >>> +       "xchg %r10, 80+save \n\t"               \
> >>> +       "xchg %r11, 88+save \n\t"               \
> >>> +       "xchg %r12, 96+save \n\t"               \
> >>> +       "xchg %r13, 104+save \n\t"              \
> >>> +       "xchg %r14, 112+save \n\t"              \
> >>> +       "xchg %r15, 120+save \n\t"              \
> >>> +
> >>> +#define INSN_RESTORE                   \
> >>> +       "xchg %rax, 0+save \n\t"                \
> >>> +       "xchg %rbx, 8+save \n\t"                \
> >>> +       "xchg %rcx, 16+save \n\t"               \
> >>> +       "xchg %rdx, 24+save \n\t"               \
> >>> +       "xchg %rsi, 32+save \n\t"               \
> >>> +       "xchg %rdi, 40+save \n\t"               \
> >>> +       "xchg %rsp, 48+save \n\t"               \
> >>> +       "xchg %rbp, 56+save \n\t"               \
> >>> +       "xchg %r8, 64+save \n\t"                \
> >>> +       "xchg %r9, 72+save \n\t"                \
> >>> +       "xchg %r10, 80+save \n\t"               \
> >>> +       "xchg %r11, 88+save \n\t"               \
> >>> +       "xchg %r12, 96+save \n\t"               \
> >>> +       "xchg %r13, 104+save \n\t"              \
> >>> +       "xchg %r14, 112+save \n\t"              \
> >>> +       "xchg %r15, 120+save \n\t"              \
> >>> +       "pushf \n\t"                    \
> >>> +       "pop 136+save \n\t"             \
> >>> +       "popf \n\t"                     \
> >>> +       "ret \n\t"                              \
> >>> +
> >>> +#define INSN_TRAP                      \
> >>> +       "in  (%dx),%al\n\t"                     \
> >>> +       ". = . + 31\n\t"                        \
> >>> +
> >>> +asm(
> >>> +       ".align 4096\n\t"
> >>> +       "insn_page:\n\t"
> >>> +       INSN_SAVE
> >>> +       "test_insn:\n\t"
> >>> +       INSN_TRAP
> >>> +       "test_insn_end:\n\t"
> >>> +       INSN_RESTORE
> >>> +       "insn_page_end:\n\t"
> >>> +       ".align 4096\n\t"
> >>> +
> >>> +       "alt_insn_page:\n\t"
> >>> +       INSN_SAVE
> >>> +       "alt_test_insn:\n\t"
> >>> +       INSN_TRAP
> >>> +       "alt_test_insn_end:\n\t"
> >>> +       INSN_RESTORE
> >>> +       "alt_insn_page_end:\n\t"
> >>> +       ".align 4096\n\t"
> >>> +);
> >>> +
> >>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> >>> +{
> >>> +       ulong *cr3 = (ulong *)read_cr3();
> >>> +       void *insn_ram;
> >>> +       int i;
> >>> +       extern u8 insn_page[], test_insn[], test_insn_end[];
> >>> +       extern u8 alt_insn_page[], alt_test_insn[];
> >>> +
> >>> +       insn_ram = vmap(virt_to_phys(insn_page), 4096);
> >>> +       for (i=1; i<test_insn_end - test_insn; i++)
> >>> +               alt_test_insn[i] = test_insn[i] = 0x90; // nop
> >>> +       for (i=0; i<alt_insn_length; i++)
> >>> +               alt_test_insn[i] = alt_insn[i];
> >>> +       for(;i<test_insn_end - test_insn; i++)
> >>> +               alt_test_insn[i] = 0x90; // nop
> >>> +       save = inregs;
> >>> +
> >>> +       // Load the code TLB with insn_page, but point the page tables at
> >>> +       // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> >>> +       // This will make the CPU trap on the insn_page instruction but the
> >>> +       // hypervisor will see alt_insn_page.
> >>> +       install_page(cr3, virt_to_phys(insn_page), insn_ram);
> >>> +       invlpg(insn_ram);
> >>> +       // Load code TLB
> >>> +       asm volatile("call *%0" : : "r"(insn_ram));
> >>> +       install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> >>> +       // Trap, let hypervisor emulate at alt_insn_page
> >>> +       asm volatile("call *%0": : "r"(insn_ram+1));
> >>> +
> >>> +       outregs = save;
> >>> +}
> >>> +
> >>>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
> >>>  {
> >>>      ++exceptions;
> >>> --
> >>> 1.7.9.5
> >>>
> >>
> >>
> >>
> >> --
> >> Arthur Chunqi Li
> >> Department of Computer Science
> >> School of EECS
> >> Peking University
> >> Beijing, China
> > 
> > --
> > 			Gleb.
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-19 15:00 Arthur Chunqi Li
  2013-06-19 15:07 ` 李春奇 <Arthur Chunqi Li>
@ 2013-06-20  8:48 ` Gleb Natapov
  2013-06-20  8:58   ` Gmail
  1 sibling, 1 reply; 54+ messages in thread
From: Gleb Natapov @ 2013-06-20  8:48 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, pbonzini, jan.kiszka

On Wed, Jun 19, 2013 at 11:00:56PM +0800, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 110 insertions(+)
>  mode change 100644 => 100755 x86/emulator.c
> 
> diff --git a/x86/emulator.c b/x86/emulator.c
> old mode 100644
> new mode 100755
> index 96576e5..48d45c8
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,15 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 r8, r9, r10, r11;
> +	u64 r12, r13, r14, r15;
> +	u64 rip, rflags;
> +};
> +struct regs inregs, outregs, save;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +#define INSN_SAVE 			\
No need for all the defines. Put all the code into insn_page, allocate
alt_insn_page dynamically and copy the code there by memcpy.

> +	"ret\n\t"				\
> +	"pushf\n\t"			\
> +	"push 136+save \n\t"		\
> +	"popf \n\t"			\
> +	"xchg %rax, 0+save \n\t"		\
> +	"xchg %rbx, 8+save \n\t"		\
> +	"xchg %rcx, 16+save \n\t"		\
> +	"xchg %rdx, 24+save \n\t"		\
> +	"xchg %rsi, 32+save \n\t"		\
> +	"xchg %rdi, 40+save \n\t"		\
> +	"xchg %rsp, 48+save \n\t"		\
> +	"xchg %rbp, 56+save \n\t"		\
> +	"xchg %r8, 64+save \n\t"		\
> +	"xchg %r9, 72+save \n\t"		\
> +	"xchg %r10, 80+save \n\t"		\
> +	"xchg %r11, 88+save \n\t"		\
> +	"xchg %r12, 96+save \n\t"		\
> +	"xchg %r13, 104+save \n\t"		\
> +	"xchg %r14, 112+save \n\t"		\
> +	"xchg %r15, 120+save \n\t"		\
> +
> +#define INSN_RESTORE			\
> +	"xchg %rax, 0+save \n\t"		\
> +	"xchg %rbx, 8+save \n\t"		\
> +	"xchg %rcx, 16+save \n\t"		\
> +	"xchg %rdx, 24+save \n\t"		\
> +	"xchg %rsi, 32+save \n\t"		\
> +	"xchg %rdi, 40+save \n\t"		\
> +	"xchg %rsp, 48+save \n\t"		\
> +	"xchg %rbp, 56+save \n\t"		\
> +	"xchg %r8, 64+save \n\t"		\
> +	"xchg %r9, 72+save \n\t"		\
> +	"xchg %r10, 80+save \n\t"		\
> +	"xchg %r11, 88+save \n\t"		\
> +	"xchg %r12, 96+save \n\t"		\
> +	"xchg %r13, 104+save \n\t"		\
> +	"xchg %r14, 112+save \n\t"		\
> +	"xchg %r15, 120+save \n\t"		\
> +	"pushf \n\t"			\
> +	"pop 136+save \n\t"		\
> +	"popf \n\t"			\
> +	"ret \n\t"				\
> +
> +#define INSN_TRAP			\
> +	"in  (%dx),%al\n\t"			\
> +	". = . + 31\n\t"			\
If you will do ".skip 31, 0x90\n\t" instead you can drop loop
that inserts nops bellow.

> +
> +asm(
> +	".align 4096\n\t"
> +	"insn_page:\n\t"
> +	INSN_SAVE
> +	"test_insn:\n\t"
> +	INSN_TRAP
> +	"test_insn_end:\n\t"
> +	INSN_RESTORE
> +	"insn_page_end:\n\t"
> +	".align 4096\n\t"
> +
> +	"alt_insn_page:\n\t"
> +	INSN_SAVE
> +	"alt_test_insn:\n\t"
> +	INSN_TRAP
> +	"alt_test_insn_end:\n\t"
> +	INSN_RESTORE
> +	"alt_insn_page_end:\n\t"
> +	".align 4096\n\t"
> +);
> +
> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	void *insn_ram;
> +	int i;
> +	extern u8 insn_page[], test_insn[], test_insn_end[];
> +	extern u8 alt_insn_page[], alt_test_insn[];
> +
> +	insn_ram = vmap(virt_to_phys(insn_page), 4096);
> +	for (i=1; i<test_insn_end - test_insn; i++)
> +		alt_test_insn[i] = test_insn[i] = 0x90; // nop
> +	for (i=0; i<alt_insn_length; i++)
> +		alt_test_insn[i] = alt_insn[i];
> +	for(;i<test_insn_end - test_insn; i++)
> +		alt_test_insn[i] = 0x90; // nop
> +	save = inregs;
> +
> +	// Load the code TLB with insn_page, but point the page tables at
> +	// alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	// This will make the CPU trap on the insn_page instruction but the
> +	// hypervisor will see alt_insn_page.
I prefer all the comments to be changed to /**/ style while we are at it.

> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	// Load code TLB
> +	asm volatile("call *%0" : : "r"(insn_ram));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	// Trap, let hypervisor emulate at alt_insn_page
> +	asm volatile("call *%0": : "r"(insn_ram+1));
> +
> +	outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> -- 
> 1.7.9.5

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-20  8:48 ` Gleb Natapov
@ 2013-06-20  8:58   ` Gmail
  0 siblings, 0 replies; 54+ messages in thread
From: Gmail @ 2013-06-20  8:58 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm@vger.kernel.org, pbonzini@redhat.com, jan.kiszka@web.de

ok, I will handle all above in the following commit.

Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China

From my iPhone

在 2013-6-20,16:48,Gleb Natapov <gleb@redhat.com> 写道:

> On Wed, Jun 19, 2013 at 11:00:56PM +0800, Arthur Chunqi Li wrote:
>> Add a function trap_emulator to run an instruction in emulator.
>> Set inregs first (%rax is invalid because it is used as return
>> address), put instruction codec in alt_insn and call func with
>> alt_insn_length. Get results in outregs.
>> 
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>> x86/emulator.c |  110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 110 insertions(+)
>> mode change 100644 => 100755 x86/emulator.c
>> 
>> diff --git a/x86/emulator.c b/x86/emulator.c
>> old mode 100644
>> new mode 100755
>> index 96576e5..48d45c8
>> --- a/x86/emulator.c
>> +++ b/x86/emulator.c
>> @@ -11,6 +11,15 @@ int fails, tests;
>> 
>> static int exceptions;
>> 
>> +struct regs {
>> +    u64 rax, rbx, rcx, rdx;
>> +    u64 rsi, rdi, rsp, rbp;
>> +    u64 r8, r9, r10, r11;
>> +    u64 r12, r13, r14, r15;
>> +    u64 rip, rflags;
>> +};
>> +struct regs inregs, outregs, save;
>> +
>> void report(const char *name, int result)
>> {
>>    ++tests;
>> @@ -685,6 +694,107 @@ static void test_shld_shrd(u32 *mem)
>>     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>> }
>> 
>> +#define INSN_SAVE            \
> No need for all the defines. Put all the code into insn_page, allocate
> alt_insn_page dynamically and copy the code there by memcpy.
> 
>> +    "ret\n\t"                \
>> +    "pushf\n\t"            \
>> +    "push 136+save \n\t"        \
>> +    "popf \n\t"            \
>> +    "xchg %rax, 0+save \n\t"        \
>> +    "xchg %rbx, 8+save \n\t"        \
>> +    "xchg %rcx, 16+save \n\t"        \
>> +    "xchg %rdx, 24+save \n\t"        \
>> +    "xchg %rsi, 32+save \n\t"        \
>> +    "xchg %rdi, 40+save \n\t"        \
>> +    "xchg %rsp, 48+save \n\t"        \
>> +    "xchg %rbp, 56+save \n\t"        \
>> +    "xchg %r8, 64+save \n\t"        \
>> +    "xchg %r9, 72+save \n\t"        \
>> +    "xchg %r10, 80+save \n\t"        \
>> +    "xchg %r11, 88+save \n\t"        \
>> +    "xchg %r12, 96+save \n\t"        \
>> +    "xchg %r13, 104+save \n\t"        \
>> +    "xchg %r14, 112+save \n\t"        \
>> +    "xchg %r15, 120+save \n\t"        \
>> +
>> +#define INSN_RESTORE            \
>> +    "xchg %rax, 0+save \n\t"        \
>> +    "xchg %rbx, 8+save \n\t"        \
>> +    "xchg %rcx, 16+save \n\t"        \
>> +    "xchg %rdx, 24+save \n\t"        \
>> +    "xchg %rsi, 32+save \n\t"        \
>> +    "xchg %rdi, 40+save \n\t"        \
>> +    "xchg %rsp, 48+save \n\t"        \
>> +    "xchg %rbp, 56+save \n\t"        \
>> +    "xchg %r8, 64+save \n\t"        \
>> +    "xchg %r9, 72+save \n\t"        \
>> +    "xchg %r10, 80+save \n\t"        \
>> +    "xchg %r11, 88+save \n\t"        \
>> +    "xchg %r12, 96+save \n\t"        \
>> +    "xchg %r13, 104+save \n\t"        \
>> +    "xchg %r14, 112+save \n\t"        \
>> +    "xchg %r15, 120+save \n\t"        \
>> +    "pushf \n\t"            \
>> +    "pop 136+save \n\t"        \
>> +    "popf \n\t"            \
>> +    "ret \n\t"                \
>> +
>> +#define INSN_TRAP            \
>> +    "in  (%dx),%al\n\t"            \
>> +    ". = . + 31\n\t"            \
> If you will do ".skip 31, 0x90\n\t" instead you can drop loop
> that inserts nops bellow.
> 
>> +
>> +asm(
>> +    ".align 4096\n\t"
>> +    "insn_page:\n\t"
>> +    INSN_SAVE
>> +    "test_insn:\n\t"
>> +    INSN_TRAP
>> +    "test_insn_end:\n\t"
>> +    INSN_RESTORE
>> +    "insn_page_end:\n\t"
>> +    ".align 4096\n\t"
>> +
>> +    "alt_insn_page:\n\t"
>> +    INSN_SAVE
>> +    "alt_test_insn:\n\t"
>> +    INSN_TRAP
>> +    "alt_test_insn_end:\n\t"
>> +    INSN_RESTORE
>> +    "alt_insn_page_end:\n\t"
>> +    ".align 4096\n\t"
>> +);
>> +
>> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
>> +{
>> +    ulong *cr3 = (ulong *)read_cr3();
>> +    void *insn_ram;
>> +    int i;
>> +    extern u8 insn_page[], test_insn[], test_insn_end[];
>> +    extern u8 alt_insn_page[], alt_test_insn[];
>> +
>> +    insn_ram = vmap(virt_to_phys(insn_page), 4096);
>> +    for (i=1; i<test_insn_end - test_insn; i++)
>> +        alt_test_insn[i] = test_insn[i] = 0x90; // nop
>> +    for (i=0; i<alt_insn_length; i++)
>> +        alt_test_insn[i] = alt_insn[i];
>> +    for(;i<test_insn_end - test_insn; i++)
>> +        alt_test_insn[i] = 0x90; // nop
>> +    save = inregs;
>> +
>> +    // Load the code TLB with insn_page, but point the page tables at
>> +    // alt_insn_page (and keep the data TLB clear, for AMD decode assist).
>> +    // This will make the CPU trap on the insn_page instruction but the
>> +    // hypervisor will see alt_insn_page.
> I prefer all the comments to be changed to /**/ style while we are at it.
> 
>> +    install_page(cr3, virt_to_phys(insn_page), insn_ram);
>> +    invlpg(insn_ram);
>> +    // Load code TLB
>> +    asm volatile("call *%0" : : "r"(insn_ram));
>> +    install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
>> +    // Trap, let hypervisor emulate at alt_insn_page
>> +    asm volatile("call *%0": : "r"(insn_ram+1));
>> +
>> +    outregs = save;
>> +}
>> +
>> static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>> {
>>     ++exceptions;
>> -- 
>> 1.7.9.5
> 
> --
>            Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

* [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
@ 2013-06-20 10:45 Arthur Chunqi Li
  2013-06-20 10:47 ` Jan Kiszka
  2013-06-20 12:32 ` Gleb Natapov
  0 siblings, 2 replies; 54+ messages in thread
From: Arthur Chunqi Li @ 2013-06-20 10:45 UTC (permalink / raw)
  To: kvm; +Cc: gleb, pbonzini, jan.kiszka, Arthur Chunqi Li

Add a function trap_emulator to run an instruction in emulator.
Set inregs first (%rax is invalid because it is used as return
address), put instruction codec in alt_insn and call func with
alt_insn_length. Get results in outregs.

Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
 lib/libcflat.h |    1 +
 lib/string.c   |   12 +++++++++
 x86/emulator.c |   78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+)

diff --git a/lib/libcflat.h b/lib/libcflat.h
index 0875bd9..fadc33d 100644
--- a/lib/libcflat.h
+++ b/lib/libcflat.h
@@ -50,6 +50,7 @@ extern int vsnprintf(char *buf, int size, const char *fmt, va_list va);
 extern void puts(const char *s);
 
 extern void *memset(void *s, int c, size_t n);
+extern void *memcpy(void *dest, const void *src, size_t n);
 
 extern long atol(const char *ptr);
 #define ARRAY_SIZE(_a)  (sizeof(_a)/sizeof((_a)[0]))
diff --git a/lib/string.c b/lib/string.c
index 9dc94a1..e798f86 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -42,6 +42,18 @@ void *memset(void *s, int c, size_t n)
     return s;
 }
 
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    size_t i;
+    char *a = dest;
+    char *b = src;
+
+    for (i = 0; i < n; ++i)
+        a[i] = b[i];
+
+    return dest;
+}
+
 long atol(const char *ptr)
 {
     long acc = 0;
diff --git a/x86/emulator.c b/x86/emulator.c
index 96576e5..b3626fa 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -11,6 +11,15 @@ int fails, tests;
 
 static int exceptions;
 
+struct regs {
+	u64 rax, rbx, rcx, rdx;
+	u64 rsi, rdi, rsp, rbp;
+	u64 r8, r9, r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 rip, rflags;
+};
+struct regs inregs, outregs, save;
+
 void report(const char *name, int result)
 {
 	++tests;
@@ -685,6 +694,75 @@ static void test_shld_shrd(u32 *mem)
     report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
 }
 
+#define INSN_XCHG_ALL			\
+	"xchg %rax, 0+save \n\t"		\
+	"xchg %rbx, 8+save \n\t"		\
+	"xchg %rcx, 16+save \n\t"		\
+	"xchg %rdx, 24+save \n\t"		\
+	"xchg %rsi, 32+save \n\t"		\
+	"xchg %rdi, 40+save \n\t"		\
+	"xchg %rsp, 48+save \n\t"		\
+	"xchg %rbp, 56+save \n\t"		\
+	"xchg %r8, 64+save \n\t"		\
+	"xchg %r9, 72+save \n\t"		\
+	"xchg %r10, 80+save \n\t"		\
+	"xchg %r11, 88+save \n\t"		\
+	"xchg %r12, 96+save \n\t"		\
+	"xchg %r13, 104+save \n\t"		\
+	"xchg %r14, 112+save \n\t"		\
+	"xchg %r15, 120+save \n\t"		\
+
+asm(
+	".align 4096\n\t"
+	"insn_page:\n\t"
+	"ret\n\t"
+	"pushf\n\t"
+	"push 136+save \n\t"
+	"popf \n\t"
+	INSN_XCHG_ALL
+	"test_insn:\n\t"
+	"in  (%dx),%al\n\t"
+	".skip 31, 0x90\n\t"
+	"test_insn_end:\n\t"
+	INSN_XCHG_ALL
+	"pushf \n\t"
+	"pop 136+save \n\t"
+	"popf \n\t"
+	"ret \n\t"
+	"insn_page_end:\n\t"
+	".align 4096\n\t"
+
+	"alt_insn_page:\n\t"
+	". = . + 4096\n\t"
+	".align 4096\n\t"
+);
+
+static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
+{
+	ulong *cr3 = (ulong *)read_cr3();
+	void *insn_ram;
+	extern u8 insn_page[], test_insn[], alt_insn_page[];
+
+	insn_ram = vmap(virt_to_phys(insn_page), 4096);
+	memcpy(alt_insn_page, test_insn, 4096);
+	memcpy(alt_insn_page + (test_insn - insn_page), alt_insn, alt_insn_length);
+	save = inregs;
+
+	/* Load the code TLB with insn_page, but point the page tables at
+	   alt_insn_page (and keep the data TLB clear, for AMD decode assist).
+	   This will make the CPU trap on the insn_page instruction but the
+	   hypervisor will see alt_insn_page. */
+	install_page(cr3, virt_to_phys(insn_page), insn_ram);
+	invlpg(insn_ram);
+	/* Load code TLB */
+	asm volatile("call *%0" : : "r"(insn_ram));
+	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
+	/* Trap, let hypervisor emulate at alt_insn_page */
+	asm volatile("call *%0": : "r"(insn_ram+1));
+
+	outregs = save;
+}
+
 static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
 {
     ++exceptions;
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-20 10:45 Arthur Chunqi Li
@ 2013-06-20 10:47 ` Jan Kiszka
  2013-06-20 12:32 ` Gleb Natapov
  1 sibling, 0 replies; 54+ messages in thread
From: Jan Kiszka @ 2013-06-20 10:47 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, gleb, pbonzini

[-- Attachment #1: Type: text/plain, Size: 407 bytes --]

On 2013-06-20 12:45, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.

Small hint: You should version your patches to help differentiating the
postings (e.g. [PATCH v3 ...])

Jan



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]

^ permalink raw reply	[flat|nested] 54+ messages in thread

* Re: [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator
  2013-06-20 10:45 Arthur Chunqi Li
  2013-06-20 10:47 ` Jan Kiszka
@ 2013-06-20 12:32 ` Gleb Natapov
  1 sibling, 0 replies; 54+ messages in thread
From: Gleb Natapov @ 2013-06-20 12:32 UTC (permalink / raw)
  To: Arthur Chunqi Li; +Cc: kvm, pbonzini, jan.kiszka

On Thu, Jun 20, 2013 at 06:45:21PM +0800, Arthur Chunqi Li wrote:
> Add a function trap_emulator to run an instruction in emulator.
> Set inregs first (%rax is invalid because it is used as return
> address), put instruction codec in alt_insn and call func with
> alt_insn_length. Get results in outregs.
> 
Looks good, some comment bellow.

> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
>  lib/libcflat.h |    1 +
>  lib/string.c   |   12 +++++++++
>  x86/emulator.c |   78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 91 insertions(+)
> 
> diff --git a/lib/libcflat.h b/lib/libcflat.h
> index 0875bd9..fadc33d 100644
> --- a/lib/libcflat.h
> +++ b/lib/libcflat.h
> @@ -50,6 +50,7 @@ extern int vsnprintf(char *buf, int size, const char *fmt, va_list va);
>  extern void puts(const char *s);
>  
>  extern void *memset(void *s, int c, size_t n);
> +extern void *memcpy(void *dest, const void *src, size_t n);
>  
>  extern long atol(const char *ptr);
>  #define ARRAY_SIZE(_a)  (sizeof(_a)/sizeof((_a)[0]))
> diff --git a/lib/string.c b/lib/string.c
> index 9dc94a1..e798f86 100644
> --- a/lib/string.c
> +++ b/lib/string.c
> @@ -42,6 +42,18 @@ void *memset(void *s, int c, size_t n)
>      return s;
>  }
>  
> +void *memcpy(void *dest, const void *src, size_t n)
> +{
> +    size_t i;
> +    char *a = dest;
> +    char *b = src;
> +
> +    for (i = 0; i < n; ++i)
> +        a[i] = b[i];
> +
> +    return dest;
> +}
> +
memcpy addition should be in separate patch usually, but for unit test
it is no a big deal.

>  long atol(const char *ptr)
>  {
>      long acc = 0;
> diff --git a/x86/emulator.c b/x86/emulator.c
> index 96576e5..b3626fa 100644
> --- a/x86/emulator.c
> +++ b/x86/emulator.c
> @@ -11,6 +11,15 @@ int fails, tests;
>  
>  static int exceptions;
>  
> +struct regs {
> +	u64 rax, rbx, rcx, rdx;
> +	u64 rsi, rdi, rsp, rbp;
> +	u64 r8, r9, r10, r11;
> +	u64 r12, r13, r14, r15;
> +	u64 rip, rflags;
> +};
> +struct regs inregs, outregs, save;
> +
>  void report(const char *name, int result)
>  {
>  	++tests;
> @@ -685,6 +694,75 @@ static void test_shld_shrd(u32 *mem)
>      report("shrd (cl)", *mem == ((0x12345678 >> 3) | (5u << 29)));
>  }
>  
> +#define INSN_XCHG_ALL			\
> +	"xchg %rax, 0+save \n\t"		\
> +	"xchg %rbx, 8+save \n\t"		\
> +	"xchg %rcx, 16+save \n\t"		\
> +	"xchg %rdx, 24+save \n\t"		\
> +	"xchg %rsi, 32+save \n\t"		\
> +	"xchg %rdi, 40+save \n\t"		\
> +	"xchg %rsp, 48+save \n\t"		\
> +	"xchg %rbp, 56+save \n\t"		\
> +	"xchg %r8, 64+save \n\t"		\
> +	"xchg %r9, 72+save \n\t"		\
> +	"xchg %r10, 80+save \n\t"		\
> +	"xchg %r11, 88+save \n\t"		\
> +	"xchg %r12, 96+save \n\t"		\
> +	"xchg %r13, 104+save \n\t"		\
> +	"xchg %r14, 112+save \n\t"		\
> +	"xchg %r15, 120+save \n\t"		\
> +
> +asm(
> +	".align 4096\n\t"
> +	"insn_page:\n\t"
> +	"ret\n\t"
> +	"pushf\n\t"
> +	"push 136+save \n\t"
> +	"popf \n\t"
> +	INSN_XCHG_ALL
> +	"test_insn:\n\t"
> +	"in  (%dx),%al\n\t"
> +	".skip 31, 0x90\n\t"
> +	"test_insn_end:\n\t"
> +	INSN_XCHG_ALL
> +	"pushf \n\t"
> +	"pop 136+save \n\t"
> +	"popf \n\t"
> +	"ret \n\t"
> +	"insn_page_end:\n\t"
> +	".align 4096\n\t"
> +
> +	"alt_insn_page:\n\t"
> +	". = . + 4096\n\t"
> +	".align 4096\n\t"
alt_insn_page can be allocated by alloc_page().

> +);
> +
> +static void trap_emulator(uint64_t *mem, uint8_t* alt_insn, int alt_insn_length)
> +{
> +	ulong *cr3 = (ulong *)read_cr3();
> +	void *insn_ram;
> +	extern u8 insn_page[], test_insn[], alt_insn_page[];
> +
> +	insn_ram = vmap(virt_to_phys(insn_page), 4096);
> +	memcpy(alt_insn_page, test_insn, 4096);
> +	memcpy(alt_insn_page + (test_insn - insn_page), alt_insn, alt_insn_length);
> +	save = inregs;
> +
> +	/* Load the code TLB with insn_page, but point the page tables at
> +	   alt_insn_page (and keep the data TLB clear, for AMD decode assist).
> +	   This will make the CPU trap on the insn_page instruction but the
> +	   hypervisor will see alt_insn_page. */
> +	install_page(cr3, virt_to_phys(insn_page), insn_ram);
> +	invlpg(insn_ram);
> +	/* Load code TLB */
> +	asm volatile("call *%0" : : "r"(insn_ram));
> +	install_page(cr3, virt_to_phys(alt_insn_page), insn_ram);
> +	/* Trap, let hypervisor emulate at alt_insn_page */
> +	asm volatile("call *%0": : "r"(insn_ram+1));
> +
> +	outregs = save;
> +}
> +
>  static void advance_rip_by_3_and_note_exception(struct ex_regs *regs)
>  {
>      ++exceptions;
> -- 
> 1.7.9.5

--
			Gleb.

^ permalink raw reply	[flat|nested] 54+ messages in thread

end of thread, other threads:[~2013-06-20 12:32 UTC | newest]

Thread overview: 54+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-06-06 15:24 [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator Arthur Chunqi Li
2013-06-06 15:24 ` [PATCH 2/2] kvm-unit-tests: Change two cases to use trap_emulator Arthur Chunqi Li
2013-06-12 20:51   ` Paolo Bonzini
2013-06-07  2:14 ` [PATCH 1/2] kvm-unit-tests: Add a func to run instruction in emulator 李春奇 <Arthur Chunqi Li>
2013-06-12 20:50 ` Paolo Bonzini
2013-06-13  4:50   ` 李春奇 <Arthur Chunqi Li>
2013-06-13  9:30     ` 李春奇 <Arthur Chunqi Li>
2013-06-13 13:12       ` Paolo Bonzini
2013-06-18 12:45       ` Gleb Natapov
2013-06-18 13:40         ` 李春奇 <Arthur Chunqi Li>
2013-06-18 14:28         ` 李春奇 <Arthur Chunqi Li>
2013-06-18 15:47           ` Gleb Natapov
2013-06-18 15:56             ` 李春奇 <Arthur Chunqi Li>
2013-06-18 16:09               ` Gleb Natapov
2013-06-18 16:14                 ` 李春奇 <Arthur Chunqi Li>
2013-06-18 16:44                   ` Gleb Natapov
2013-06-19  1:26                     ` 李春奇 <Arthur Chunqi Li>
2013-06-19  9:31                       ` Gleb Natapov
2013-06-19 12:18                         ` 李春奇 <Arthur Chunqi Li>
2013-06-19 12:26                           ` Gleb Natapov
2013-06-19 12:30                             ` 李春奇 <Arthur Chunqi Li>
2013-06-19 12:32                               ` Gleb Natapov
2013-06-19 14:01                                 ` 李春奇 <Arthur Chunqi Li>
2013-06-19 14:13                                   ` Gleb Natapov
2013-06-19 14:20                                     ` 李春奇 <Arthur Chunqi Li>
  -- strict thread matches above, loose matches on Subject: below --
2013-06-07  2:31 Arthur Chunqi Li
2013-06-09 11:07 ` Gleb Natapov
2013-06-09 12:44   ` 李春奇 <Arthur Chunqi Li>
2013-06-09 12:49     ` Gleb Natapov
2013-06-09 12:56       ` 李春奇 <Arthur Chunqi Li>
2013-06-09 12:58         ` Gleb Natapov
2013-06-09 13:22       ` 李春奇 <Arthur Chunqi Li>
2013-06-09 14:09         ` Gleb Natapov
2013-06-09 15:23           ` 李春奇 <Arthur Chunqi Li>
2013-06-09 16:00             ` Gleb Natapov
2013-06-09 17:09               ` 李春奇 <Arthur Chunqi Li>
2013-06-09 17:13                 ` Gleb Natapov
2013-06-09 17:28                   ` 李春奇 <Arthur Chunqi Li>
2013-06-09 17:39                     ` Gleb Natapov
2013-06-10 13:38 Arthur Chunqi Li
2013-06-10 17:36 ` Gleb Natapov
2013-06-13 15:16 Arthur Chunqi Li
2013-06-19 15:00 Arthur Chunqi Li
2013-06-19 15:07 ` 李春奇 <Arthur Chunqi Li>
2013-06-19 16:03   ` Gleb Natapov
2013-06-19 17:48     ` Gmail
2013-06-20  5:42       ` Gleb Natapov
2013-06-20  8:29     ` Paolo Bonzini
2013-06-20  8:31       ` Gleb Natapov
2013-06-20  8:48 ` Gleb Natapov
2013-06-20  8:58   ` Gmail
2013-06-20 10:45 Arthur Chunqi Li
2013-06-20 10:47 ` Jan Kiszka
2013-06-20 12:32 ` Gleb Natapov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).