* [PATCH 1/6] allow emulation of syscalls instructions on #UD
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-17 13:50 ` [PATCH 2/6] add missing EFLAGS bit definitions Andre Przywara
` (5 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Add the opcodes for syscall, sysenter and sysexit to the list of instructions
handled by the undefined opcode handler.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86.c | 33 ++++++++++++++++++++++++++-------
1 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6025e5b..88e159c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2632,14 +2632,33 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
- /* Reject the instructions other than VMCALL/VMMCALL when
- * try to emulate invalid opcode */
+ /* Only allow emulation of specific instructions on #UD
+ * (namely VMMCALL, sysenter, sysexit, syscall)*/
c = &vcpu->arch.emulate_ctxt.decode;
- if ((emulation_type & EMULTYPE_TRAP_UD) &&
- (!(c->twobyte && c->b == 0x01 &&
- (c->modrm_reg == 0 || c->modrm_reg == 3) &&
- c->modrm_mod == 3 && c->modrm_rm == 1)))
- return EMULATE_FAIL;
+ if (emulation_type & EMULTYPE_TRAP_UD) {
+ if (!c->twobyte)
+ return EMULATE_FAIL;
+ switch (c->b) {
+ case 0x01: /* VMMCALL */
+ if (c->modrm_mod != 3 || c->modrm_rm != 1)
+ return EMULATE_FAIL;
+ break;
+ case 0x34: /* sysenter */
+ case 0x35: /* sysexit */
+ if (c->modrm_mod != 0 || c->modrm_rm != 0)
+ return EMULATE_FAIL;
+ break;
+ case 0x05: /* syscall */
+ if (c->modrm_mod != 0 || c->modrm_rm != 0)
+ return EMULATE_FAIL;
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (!(c->modrm_reg == 0 || c->modrm_reg == 3))
+ return EMULATE_FAIL;
+ }
++vcpu->stat.insn_emulation;
if (r) {
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 2/6] add missing EFLAGS bit definitions
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
2009-06-17 13:50 ` [PATCH 1/6] allow emulation of syscalls instructions on #UD Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-17 13:50 ` [PATCH 3/6] prepare for emulation of syscall instructions Andre Przywara
` (4 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 22c765d..e387c83 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -320,8 +320,11 @@ static u32 group2_table[] = {
};
/* EFLAGS bit definitions. */
+#define EFLG_VM (1<<17)
+#define EFLG_RF (1<<16)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
+#define EFLG_IF (1<<9)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 3/6] prepare for emulation of syscall instructions
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
2009-06-17 13:50 ` [PATCH 1/6] allow emulation of syscalls instructions on #UD Andre Przywara
2009-06-17 13:50 ` [PATCH 2/6] add missing EFLAGS bit definitions Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-17 13:50 ` [PATCH 4/6] add syscall emulation Andre Przywara
` (3 subsequent siblings)
6 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Amit Shah, Christoph Egger
Add the flags needed for syscall, sysenter and sysexit to the opcode table.
Catch (but for now ignore) the opcodes in the emulation switch/case.
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
---
arch/x86/kvm/x86_emulate.c | 17 +++++++++++++++--
1 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index e387c83..328ccba 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <asm/kvm_x86_emulate.h>
+#include "mmu.h" /* for is_long_mode() */
+
/*
* Opcode effective-address decode tables.
* Note that we only emulate instructions that have at least one memory
@@ -209,7 +211,7 @@ static u32 opcode_table[256] = {
static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
- 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
+ 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
@@ -217,7 +219,9 @@ static u32 twobyte_table[256] = {
ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
- ImplicitOps, 0, ImplicitOps, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ ImplicitOps, 0, ImplicitOps, 0,
+ ImplicitOps, ImplicitOps, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -1988,6 +1992,9 @@ twobyte_insn:
goto cannot_emulate;
}
break;
+ case 0x05: /* syscall */
+ goto cannot_emulate;
+ break;
case 0x06:
emulate_clts(ctxt->vcpu);
c->dst.type = OP_NONE;
@@ -2054,6 +2061,12 @@ twobyte_insn:
rc = X86EMUL_CONTINUE;
c->dst.type = OP_NONE;
break;
+ case 0x34: /* sysenter */
+ goto cannot_emulate;
+ break;
+ case 0x35: /* sysexit */
+ goto cannot_emulate;
+ break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
if (!test_cc(c->b, ctxt->eflags))
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 4/6] add syscall emulation
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
` (2 preceding siblings ...)
2009-06-17 13:50 ` [PATCH 3/6] prepare for emulation of syscall instructions Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-18 8:47 ` Avi Kivity
2009-06-17 13:50 ` [PATCH 5/6] add sysenter emulation Andre Przywara
` (2 subsequent siblings)
6 siblings, 1 reply; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger
Handle #UD intercept of the syscall instruction in 32bit compat mode on
an Intel host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual. Save the RIP and EFLAGS to the correct registers.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 88 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 328ccba..89bd53e 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1397,6 +1397,90 @@ void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
ctxt->interruptibility = mask;
}
+static inline void
+setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
+ struct kvm_segment *cs, struct kvm_segment *ss)
+{
+ memset(cs, 0, sizeof(struct kvm_segment));
+ kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
+ memset(ss, 0, sizeof(struct kvm_segment));
+
+ cs->l = 0; /* will be adjusted later */
+ cs->base = 0; /* flat segment */
+ cs->g = 1; /* 4kb granularity */
+ cs->limit = 0xfffff; /* 4GB limit */
+ cs->type = 0x0b; /* Read, Execute, Accessed */
+ cs->s = 1;
+ cs->dpl = 0; /* will be adjusted later */
+ cs->present = 1;
+ cs->db = 1;
+
+ ss->unusable = 0;
+ ss->base = 0; /* flat segment */
+ ss->limit = 0xfffff; /* 4GB limit */
+ ss->g = 1; /* 4kb granularity */
+ ss->s = 1;
+ ss->type = 0x03; /* Read/Write, Accessed */
+ ss->db = 1; /* 32bit stack segment */
+ ss->dpl = 0;
+ ss->present = 1;
+}
+
+static int
+emulate_syscall(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+
+ /* syscall is not available in real mode */
+ if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
+ || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
+ return -1;
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ msr_data >>= 32;
+ cs.selector = (u16)(msr_data & 0xfffc);
+ ss.selector = (u16)(msr_data + 8);
+
+ if (is_long_mode(ctxt->vcpu)) {
+ cs.db = 0;
+ cs.l = 1;
+ if (ctxt->mode == X86EMUL_MODE_PROT64) {
+ /* Intel cares about granularity (g bit),
+ * so we don't set the effective limit.
+ */
+ cs.g = 1;
+ cs.limit = 0xffffffff;
+ }
+ }
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ c->regs[VCPU_REGS_RCX] = c->eip;
+ if (is_long_mode(ctxt->vcpu)) {
+ c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ ctxt->mode == X86EMUL_MODE_PROT64 ?
+ MSR_LSTAR : MSR_CSTAR, &msr_data);
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
+ ctxt->eflags &= ~(msr_data | EFLG_RF);
+ } else {
+ /* legacy mode */
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ c->eip = (u32)msr_data;
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+ }
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -1993,7 +2077,10 @@ twobyte_insn:
}
break;
case 0x05: /* syscall */
- goto cannot_emulate;
+ if (emulate_syscall(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x06:
emulate_clts(ctxt->vcpu);
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 4/6] add syscall emulation
2009-06-17 13:50 ` [PATCH 4/6] add syscall emulation Andre Przywara
@ 2009-06-18 8:47 ` Avi Kivity
2009-06-18 10:27 ` Andre Przywara
0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2009-06-18 8:47 UTC (permalink / raw)
To: Andre Przywara; +Cc: kvm, Christoph Egger
On 06/17/2009 04:50 PM, Andre Przywara wrote:
>
> +static inline void
> +setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
> + struct kvm_segment *cs, struct kvm_segment *ss)
> +{
> + memset(cs, 0, sizeof(struct kvm_segment));
> + kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
> + memset(ss, 0, sizeof(struct kvm_segment));
> +
> + cs->l = 0; /* will be adjusted later */
> + cs->base = 0; /* flat segment */
> + cs->g = 1; /* 4kb granularity */
> + cs->limit = 0xfffff; /* 4GB limit */
>
...
> + ss->limit = 0xfffff; /* 4GB limit */
>
limit in kvm_segment is expanded, so 4GB is 0xffffffff.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 4/6] add syscall emulation
2009-06-18 8:47 ` Avi Kivity
@ 2009-06-18 10:27 ` Andre Przywara
2009-06-22 8:43 ` Avi Kivity
0 siblings, 1 reply; 14+ messages in thread
From: Andre Przywara @ 2009-06-18 10:27 UTC (permalink / raw)
To: Avi Kivity; +Cc: Christoph Egger, kvm
Avi Kivity wrote:
> On 06/17/2009 04:50 PM, Andre Przywara wrote:
>>
>> +static inline void
>> +setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
>> + struct kvm_segment *cs, struct kvm_segment *ss)
>> +{
>> + memset(cs, 0, sizeof(struct kvm_segment));
>> + kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
>> + memset(ss, 0, sizeof(struct kvm_segment));
>> +
>> + cs->l = 0; /* will be adjusted later */
>> + cs->base = 0; /* flat segment */
>> + cs->g = 1; /* 4kb granularity */
>> + cs->limit = 0xfffff; /* 4GB limit */
>>
> ...
>> + ss->limit = 0xfffff; /* 4GB limit */
>>
>
> limit in kvm_segment is expanded, so 4GB is 0xffffffff.
You are right. This was probably a leftover from this cross vendor
migration g-bit issue.
Beside the fix here this removes some code in the separate paths, so I
remade patch 4-6/6 and will resend them.
Thanks for spotting this.
Regards,
Andre.
--
Andre Przywara
AMD-OSRC (Dresden)
Tel: x29712
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 4/6] add syscall emulation
2009-06-18 10:27 ` Andre Przywara
@ 2009-06-22 8:43 ` Avi Kivity
0 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2009-06-22 8:43 UTC (permalink / raw)
To: Andre Przywara; +Cc: Christoph Egger, kvm
On 06/18/2009 01:27 PM, Andre Przywara wrote:
>> limit in kvm_segment is expanded, so 4GB is 0xffffffff.
>
> You are right. This was probably a leftover from this cross vendor
> migration g-bit issue.
> Beside the fix here this removes some code in the separate paths, so I
> remade patch 4-6/6 and will resend them.
>
Applied the new patchset; thanks.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 5/6] add sysenter emulation
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
` (3 preceding siblings ...)
2009-06-17 13:50 ` [PATCH 4/6] add syscall emulation Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-17 13:50 ` [PATCH 6/6] add sysexit emulation Andre Przywara
2009-06-18 8:48 ` [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Avi Kivity
6 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Handle #UD intercept of the sysenter instruction in 32bit compat mode on
an AMD host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 72 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 71 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 89bd53e..2f62aaa 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1481,6 +1481,73 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
return 0;
}
+static int
+emulate_sysenter(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix)
+ return -1;
+
+ /* inject #GP if in real mode or paging is disabled */
+ if (ctxt->mode == X86EMUL_MODE_REAL ||
+ !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ /* XXX sysenter/sysexit have not been tested in 64bit mode.
+ * Therefore, we inject an #UD.
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return -1;
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_PROT32:
+ if ((msr_data & 0xfffc) == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ break;
+ case X86EMUL_MODE_PROT64:
+ if (msr_data == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ break;
+ }
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+ cs.selector = (u16)msr_data;
+ cs.selector &= ~SELECTOR_RPL_MASK;
+ ss.selector = cs.selector + 8;
+ ss.selector &= ~SELECTOR_RPL_MASK;
+ if (ctxt->mode == X86EMUL_MODE_PROT64
+ || is_long_mode(ctxt->vcpu)) {
+ cs.db = 0;
+ cs.l = 1;
+ cs.limit = 0xffffffff;
+ ss.limit = 0xffffffff;
+ }
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
+ c->regs[VCPU_REGS_RSP] = msr_data;
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -2149,7 +2216,10 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
- goto cannot_emulate;
+ if (emulate_sysenter(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x35: /* sysexit */
goto cannot_emulate;
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 6/6] add sysexit emulation
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
` (4 preceding siblings ...)
2009-06-17 13:50 ` [PATCH 5/6] add sysenter emulation Andre Przywara
@ 2009-06-17 13:50 ` Andre Przywara
2009-06-18 8:48 ` [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Avi Kivity
6 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-17 13:50 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Handle #UD intercept of the sysexit instruction in 64bit mode returning to
32bit compat mode on an AMD host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 79 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 78 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2f62aaa..7df05cc 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1548,6 +1548,80 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
return 0;
}
+static int
+emulate_sysexit(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+ int usermode;
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix)
+ return -1;
+
+ /* inject #GP if in real mode or paging is disabled */
+ if (ctxt->mode == X86EMUL_MODE_REAL
+ || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ /* sysexit must be called from CPL 0 */
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ if ((c->rex_prefix & 0x8) != 0x0)
+ usermode = X86EMUL_MODE_PROT64;
+ else
+ usermode = X86EMUL_MODE_PROT32;
+
+ /* We don't care about cs.g/ss.g bits
+ * (= 4kb granularity) so we have to set the effective
+ * limit here or we get a #GP in the guest, otherwise.
+ */
+ cs.limit = 0xffffffff;
+ ss.limit = 0xffffffff;
+
+ cs.dpl = 3;
+ ss.dpl = 3;
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ switch (usermode) {
+ case X86EMUL_MODE_PROT32:
+ cs.selector = (u16)(msr_data + 16);
+ if ((msr_data & 0xfffc) == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ ss.selector = (u16)(msr_data + 24);
+ break;
+ case X86EMUL_MODE_PROT64:
+ cs.selector = (u16)(msr_data + 32);
+ if (msr_data == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ ss.selector = cs.selector + 8;
+ cs.db = 0;
+ cs.l = 1;
+ break;
+ }
+ cs.selector |= SELECTOR_RPL_MASK;
+ ss.selector |= SELECTOR_RPL_MASK;
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
+ c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -2222,7 +2296,10 @@ twobyte_insn:
goto writeback;
break;
case 0x35: /* sysexit */
- goto cannot_emulate;
+ if (emulate_sysexit(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode
2009-06-17 13:50 [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Andre Przywara
` (5 preceding siblings ...)
2009-06-17 13:50 ` [PATCH 6/6] add sysexit emulation Andre Przywara
@ 2009-06-18 8:48 ` Avi Kivity
2009-06-18 10:56 ` [PATCH 4/6 v2] add syscall emulation Andre Przywara
6 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2009-06-18 8:48 UTC (permalink / raw)
To: Andre Przywara; +Cc: kvm, Amit Shah, Christoph Egger
On 06/17/2009 04:50 PM, Andre Przywara wrote:
> sysenter/sysexit are not supported on AMD's 32bit compat mode, whereas
> syscall is not supported on Intel's 32bit compat mode. To allow cross
> vendor migration we emulate the missing instructions by setting up the
> processor state accordingly.
> The sysenter code was originally sketched by Amit Shah, it was completed,
> debugged, syscall added and made-to-work by Christoph Egger and polished
> up by Andre Przywara.
> Please note that sysret does not need to be emulated, because it will be
> exectued in 64bit mode and returning to 32bit compat mode works on Intel.
>
> This has been tested with GETPIDs in a tight loop in compat mode on both
> Intel and AMD boxes. Additionally a 32-bit userland was booted under a
> 64-bit kernel and then cross-vendor migrated.
>
> Please apply or comment ;-)
>
Looks good except for the limit thing I pointed out. If the correct
thing is to change it to 0xffffffff, let me know and I'll do it in
place, instead of resending the whole thing.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH 4/6 v2] add syscall emulation
2009-06-18 8:48 ` [PATCH 0/6] add sysenter/syscall emulation for 32bit compat mode Avi Kivity
@ 2009-06-18 10:56 ` Andre Przywara
2009-06-18 10:56 ` [PATCH 5/6 v2] add sysenter emulation Andre Przywara
0 siblings, 1 reply; 14+ messages in thread
From: Andre Przywara @ 2009-06-18 10:56 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger
Handle #UD intercept of the syscall instruction in 32bit compat mode on
an Intel host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual. Save the RIP and EFLAGS to the correct registers.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 82 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 81 insertions(+), 1 deletions(-)
Avi,
these are the new versions of patch 4-6/6. If you need delta patches, tell me.
Thanks,
Andre.
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 328ccba..d0a51c4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1397,6 +1397,83 @@ void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
ctxt->interruptibility = mask;
}
+static inline void
+setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
+ struct kvm_segment *cs, struct kvm_segment *ss)
+{
+ memset(cs, 0, sizeof(struct kvm_segment));
+ kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
+ memset(ss, 0, sizeof(struct kvm_segment));
+
+ cs->l = 0; /* will be adjusted later */
+ cs->base = 0; /* flat segment */
+ cs->g = 1; /* 4kb granularity */
+ cs->limit = 0xffffffff; /* 4GB limit */
+ cs->type = 0x0b; /* Read, Execute, Accessed */
+ cs->s = 1;
+ cs->dpl = 0; /* will be adjusted later */
+ cs->present = 1;
+ cs->db = 1;
+
+ ss->unusable = 0;
+ ss->base = 0; /* flat segment */
+ ss->limit = 0xffffffff; /* 4GB limit */
+ ss->g = 1; /* 4kb granularity */
+ ss->s = 1;
+ ss->type = 0x03; /* Read/Write, Accessed */
+ ss->db = 1; /* 32bit stack segment */
+ ss->dpl = 0;
+ ss->present = 1;
+}
+
+static int
+emulate_syscall(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+
+ /* syscall is not available in real mode */
+ if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
+ || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
+ return -1;
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ msr_data >>= 32;
+ cs.selector = (u16)(msr_data & 0xfffc);
+ ss.selector = (u16)(msr_data + 8);
+
+ if (is_long_mode(ctxt->vcpu)) {
+ cs.db = 0;
+ cs.l = 1;
+ }
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ c->regs[VCPU_REGS_RCX] = c->eip;
+ if (is_long_mode(ctxt->vcpu)) {
+ c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu,
+ ctxt->mode == X86EMUL_MODE_PROT64 ?
+ MSR_LSTAR : MSR_CSTAR, &msr_data);
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
+ ctxt->eflags &= ~(msr_data | EFLG_RF);
+ } else {
+ /* legacy mode */
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
+ c->eip = (u32)msr_data;
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+ }
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -1993,7 +2070,10 @@ twobyte_insn:
}
break;
case 0x05: /* syscall */
- goto cannot_emulate;
+ if (emulate_syscall(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x06:
emulate_clts(ctxt->vcpu);
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 5/6 v2] add sysenter emulation
2009-06-18 10:56 ` [PATCH 4/6 v2] add syscall emulation Andre Przywara
@ 2009-06-18 10:56 ` Andre Przywara
2009-06-18 10:56 ` [PATCH 6/6 v2] add sysexit emulation Andre Przywara
0 siblings, 1 reply; 14+ messages in thread
From: Andre Przywara @ 2009-06-18 10:56 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Handle #UD intercept of the sysenter instruction in 32bit compat mode on
an AMD host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 70 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 69 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d0a51c4..fdf75f6 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1474,6 +1474,71 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
return 0;
}
+static int
+emulate_sysenter(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix)
+ return -1;
+
+ /* inject #GP if in real mode or paging is disabled */
+ if (ctxt->mode == X86EMUL_MODE_REAL ||
+ !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ /* XXX sysenter/sysexit have not been tested in 64bit mode.
+ * Therefore, we inject an #UD.
+ */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ return -1;
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ switch (ctxt->mode) {
+ case X86EMUL_MODE_PROT32:
+ if ((msr_data & 0xfffc) == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ break;
+ case X86EMUL_MODE_PROT64:
+ if (msr_data == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ break;
+ }
+
+ ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
+ cs.selector = (u16)msr_data;
+ cs.selector &= ~SELECTOR_RPL_MASK;
+ ss.selector = cs.selector + 8;
+ ss.selector &= ~SELECTOR_RPL_MASK;
+ if (ctxt->mode == X86EMUL_MODE_PROT64
+ || is_long_mode(ctxt->vcpu)) {
+ cs.db = 0;
+ cs.l = 1;
+ }
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
+ c->eip = msr_data;
+
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
+ c->regs[VCPU_REGS_RSP] = msr_data;
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -2142,7 +2207,10 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
- goto cannot_emulate;
+ if (emulate_sysenter(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x35: /* sysexit */
goto cannot_emulate;
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 6/6 v2] add sysexit emulation
2009-06-18 10:56 ` [PATCH 5/6 v2] add sysenter emulation Andre Przywara
@ 2009-06-18 10:56 ` Andre Przywara
0 siblings, 0 replies; 14+ messages in thread
From: Andre Przywara @ 2009-06-18 10:56 UTC (permalink / raw)
To: avi; +Cc: kvm, Andre Przywara, Christoph Egger, Amit Shah
Handle #UD intercept of the sysexit instruction in 64bit mode returning to
32bit compat mode on an AMD host.
Setup the segment descriptors for CS and SS and the EIP/ESP registers
according to the manual.
Signed-off-by: Christoph Egger <christoph.egger@amd.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
arch/x86/kvm/x86_emulate.c | 72 +++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 71 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index fdf75f6..6849868 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -1539,6 +1539,73 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
return 0;
}
+static int
+emulate_sysexit(struct x86_emulate_ctxt *ctxt)
+{
+ struct decode_cache *c = &ctxt->decode;
+ struct kvm_segment cs, ss;
+ u64 msr_data;
+ int usermode;
+
+ /* inject #UD if LOCK prefix is used */
+ if (c->lock_prefix)
+ return -1;
+
+ /* inject #GP if in real mode or paging is disabled */
+ if (ctxt->mode == X86EMUL_MODE_REAL
+ || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ /* sysexit must be called from CPL 0 */
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+
+ setup_syscalls_segments(ctxt, &cs, &ss);
+
+ if ((c->rex_prefix & 0x8) != 0x0)
+ usermode = X86EMUL_MODE_PROT64;
+ else
+ usermode = X86EMUL_MODE_PROT32;
+
+ cs.dpl = 3;
+ ss.dpl = 3;
+ kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
+ switch (usermode) {
+ case X86EMUL_MODE_PROT32:
+ cs.selector = (u16)(msr_data + 16);
+ if ((msr_data & 0xfffc) == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ ss.selector = (u16)(msr_data + 24);
+ break;
+ case X86EMUL_MODE_PROT64:
+ cs.selector = (u16)(msr_data + 32);
+ if (msr_data == 0x0) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return -1;
+ }
+ ss.selector = cs.selector + 8;
+ cs.db = 0;
+ cs.l = 1;
+ break;
+ }
+ cs.selector |= SELECTOR_RPL_MASK;
+ ss.selector |= SELECTOR_RPL_MASK;
+
+ kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
+ kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
+
+ c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
+ c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
+
+ return 0;
+}
+
int
x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
{
@@ -2213,7 +2280,10 @@ twobyte_insn:
goto writeback;
break;
case 0x35: /* sysexit */
- goto cannot_emulate;
+ if (emulate_sysexit(ctxt) == -1)
+ goto cannot_emulate;
+ else
+ goto writeback;
break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
--
1.6.1.3
^ permalink raw reply related [flat|nested] 14+ messages in thread