* [RFC][PATCH v2] VMX: Invalid guest state emulation
@ 2008-08-13 23:58 Mohammed Gamal
2008-08-14 7:26 ` Avi Kivity
0 siblings, 1 reply; 6+ messages in thread
From: Mohammed Gamal @ 2008-08-13 23:58 UTC (permalink / raw)
To: kvm; +Cc: avi, riel, andrea, guillaume.thouvenin, laurent.vivier
This patch aims to allow emulation whenever guest state is not valid for VMX operation.
This usually happens in mode switches with guests such as older versions of gfxboot and FreeDOS with HIMEM.
The patch aims to address this issue, it introduces the following:
- A function that invokes the x86 emulator when the guest state is not valid (borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, in order not to intervene with KVM's normal operation
This version adds the following:
- An "emulation required" flag, which is set on mode switches
- Improved guest state checking functions
- Emulation is done on guest entry, rather than directly on mode switching utilising the emulation flag.
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Mohammed Gamal <m.gamal005@gmail.com>
---
arch/x86/kvm/vmx.c | 511 +++++++++++++++++++++++++++++++++++++++-------------
1 files changed, 381 insertions(+), 130 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..bc23db4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
static int enable_ept = 1;
module_param(enable_ept, bool, 0);
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -86,6 +89,7 @@ struct vcpu_vmx {
} irq;
} rmode;
int vpid;
+ bool emulation_required;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
static void enter_pmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
vcpu->arch.rmode.active = 0;
vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
@@ -1311,17 +1317,19 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
update_exception_bitmap(vcpu);
- fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
- fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
- fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
- fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ if(!emulate_invalid_guest_state) {
+ fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+ fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+ fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+ fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
- vmcs_write16(GUEST_SS_SELECTOR, 0);
- vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
+ vmcs_write16(GUEST_SS_SELECTOR, 0);
+ vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
- vmcs_write16(GUEST_CS_SELECTOR,
- vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
- vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+ vmcs_write16(GUEST_CS_SELECTOR,
+ vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
+ vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+ }
}
static gva_t rmode_tss_base(struct kvm *kvm)
@@ -1351,7 +1359,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
vcpu->arch.rmode.active = 1;
vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1373,20 +1383,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
- vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
- vmcs_write32(GUEST_SS_LIMIT, 0xffff);
- vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+ if(!emulate_invalid_guest_state) {
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
+ vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+ vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
- vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
- vmcs_write32(GUEST_CS_LIMIT, 0xffff);
- if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
- vmcs_writel(GUEST_CS_BASE, 0xf0000);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
+ vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+ vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+ if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
+ vmcs_writel(GUEST_CS_BASE, 0xf0000);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
- fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
- fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
- fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
- fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+ fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+ fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+ fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ }
kvm_mmu_reset_context(vcpu);
init_rmode(vcpu->kvm);
@@ -1721,6 +1733,206 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
vmcs_writel(GUEST_GDTR_BASE, dt->base);
}
+static bool rmode_segment_invalid(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment var;
+ u32 ar;
+
+ vmx_get_segment(vcpu, &var, seg);
+ ar = vmx_segment_access_rights(&var);
+
+ if (var.limit != 0xffff)
+ return true;
+ if (ar != 0xf3)
+ return true;
+
+ return false;
+}
+
+static bool code_segment_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs;
+
+ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+ if (!(cs.type & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)))
+ return true;
+ if (!cs.s)
+ return true;
+ if ((cs.type <= 0xb) && (cs.type >= 0x8)) {
+ if (cs.dpl != (cs.selector & 3))
+ return true;
+ }
+ if ((cs.type <= 0xf) && (cs.type >= 0xc)) {
+ if (cs.dpl > (cs.selector & 3))
+ return true;
+ }
+ if (!cs.present)
+ return true;
+ if((cs.limit & 0x00000fff) != 0x00000fff) {
+ if(cs.g)
+ return true;
+ }
+ else if(cs.limit & 0xfff00000) {
+ if(!cs.g)
+ return true;
+ }
+ /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
+
+ return false;
+}
+
+static bool stack_segment_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment ss;
+
+ vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
+
+ if ((ss.type != 3) || (ss.type != 7))
+ return true;
+ if (!ss.s)
+ return true;
+ if (ss.dpl != (ss.selector & 3)) /* DPL != RPL */
+ return true;
+ if (!ss.present)
+ return true;
+ if((ss.limit & 0x00000fff) != 0x00000fff) {
+ if(ss.g)
+ return true;
+ }
+ else if(ss.limit & 0xfff00000) {
+ if(!ss.g)
+ return true;
+ }
+
+ return false;
+}
+
+static bool data_segment_invalid(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment var;
+
+ vmx_get_segment(vcpu, &var, seg);
+
+ if (!var.s)
+ return true;
+ if (!var.present)
+ return true;
+ if (var.type <= 0xb) {
+ if (var.dpl < (var.selector & 3)) /* DPL < RPL */
+ return true;
+ }
+ if ((var.limit & 0x00000fff) != 0x00000fff) {
+ if(var.g)
+ return true;
+ }
+ else if(var.limit & 0xfff00000) {
+ if(!var.g)
+ return true;
+ }
+ /* TODO: Add other members to kvm_segment_field to allow checking for other access
+ * rights flags
+ */
+ return false;
+}
+
+static bool tr_segment_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment tr;
+
+ vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
+
+ if (tr.selector & 2) /* TI = 1 */
+ return true;
+ if((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+ return true;
+ if (!tr.present)
+ return true;
+ if ((tr.limit & 0x00000fff) != 0x00000fff) {
+ if(tr.g)
+ return true;
+ }
+ else if (tr.limit & 0xfff00000) {
+ if(!tr.g)
+ return true;
+ }
+
+ return false;
+}
+
+static bool ldtr_segment_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment ldtr;
+
+ vmx_get_segment(vcpu, &ldtr, VCPU_SREG_TR);
+
+ if (ldtr.selector & 2) /* TI = 1 */
+ return true;
+ if(ldtr.type != 2)
+ return true;
+ if (!ldtr.present)
+ return true;
+ if ((ldtr.limit & 0x00000fff) != 0x00000fff) {
+ if(ldtr.g)
+ return true;
+ }
+ else if (ldtr.limit & 0xfff00000) {
+ if(!ldtr.g)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_invalid(struct kvm_vcpu *vcpu)
+{
+ /* real mode guest state checks */
+ if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_CS))
+ return true;
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_SS))
+ return true;
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_DS))
+ return true;
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_ES))
+ return true;
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_FS))
+ return true;
+ if(rmode_segment_invalid(vcpu, VCPU_SREG_GS))
+ return true;
+ }
+ else { /* protected mode guest state checks */
+ if(code_segment_invalid(vcpu))
+ return true;
+ if(stack_segment_invalid(vcpu))
+ return true;
+ if(data_segment_invalid(vcpu, VCPU_SREG_DS))
+ return true;
+ if(data_segment_invalid(vcpu, VCPU_SREG_ES))
+ return true;
+ if(data_segment_invalid(vcpu, VCPU_SREG_FS))
+ return true;
+ if(data_segment_invalid(vcpu, VCPU_SREG_GS))
+ return true;
+ }
+
+ if (tr_segment_valid(vcpu))
+ return true;
+ if (ldtr_segment_valid(vcpu))
+ return true;
+ /* TODO:
+ * - Add checks on RIP
+ * - Add checks on RFLAGS
+ */
+
+ return false;
+}
+
static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
@@ -2131,6 +2343,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vpid_sync_vcpu_all(vmx);
ret = 0;
+ vmx->emulation_required = 0; //HACK: Don't enable emulation on guest boot
out:
up_read(&vcpu->kvm->slots_lock);
@@ -2708,6 +2921,38 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ u8 opcodes[4];
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ unsigned long rip = kvm_rip_read(vcpu);
+ unsigned long rip_linear;
+ int err;
+
+ while (guest_state_invalid(vcpu)) {
+ rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
+ emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+ err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+
+ switch (err) {
+ case EMULATE_DONE:
+ kvm_report_emulation_failure(vcpu, "emulation success");
+ break;
+ case EMULATE_DO_MMIO:
+ kvm_report_emulation_failure(vcpu, "mmio");
+ /* TODO: Handle MMIO */
+ return;
+ default:
+ kvm_report_emulation_failure(vcpu, "emulation failure");
+ return;
+ }
+ }
+
+ /* Guest state should be valid now, no more emulation should be needed */
+ vmx->emulation_required = 0;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2969,131 +3214,137 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
- if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
- if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
- vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
+ /* Handle invalid guest state instrad of entering VMX */
+ if (vmx->emulation_required && emulate_invalid_guest_state)
+ handle_invalid_guest_state(vcpu, kvm_run);
+
+ else {
+ if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
+ vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
+ if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
+ vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
- /*
- * Loading guest fpu may have cleared host cr0.ts
- */
- vmcs_writel(HOST_CR0, read_cr0());
-
- asm(
- /* Store host registers */
- "push %%"R"dx; push %%"R"bp;"
- "push %%"R"cx \n\t"
- "cmp %%"R"sp, %c[host_rsp](%0) \n\t"
- "je 1f \n\t"
- "mov %%"R"sp, %c[host_rsp](%0) \n\t"
- __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
- "1: \n\t"
- /* Check if vmlaunch of vmresume is needed */
- "cmpl $0, %c[launched](%0) \n\t"
- /* Load guest registers. Don't clobber flags. */
- "mov %c[cr2](%0), %%"R"ax \n\t"
- "mov %%"R"ax, %%cr2 \n\t"
- "mov %c[rax](%0), %%"R"ax \n\t"
- "mov %c[rbx](%0), %%"R"bx \n\t"
- "mov %c[rdx](%0), %%"R"dx \n\t"
- "mov %c[rsi](%0), %%"R"si \n\t"
- "mov %c[rdi](%0), %%"R"di \n\t"
- "mov %c[rbp](%0), %%"R"bp \n\t"
+ /*
+ * Loading guest fpu may have cleared host cr0.ts
+ */
+ vmcs_writel(HOST_CR0, read_cr0());
+
+ asm(
+ /* Store host registers */
+ "push %%"R"dx; push %%"R"bp;"
+ "push %%"R"cx \n\t"
+ "cmp %%"R"sp, %c[host_rsp](%0) \n\t"
+ "je 1f \n\t"
+ "mov %%"R"sp, %c[host_rsp](%0) \n\t"
+ __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
+ "1: \n\t"
+ /* Check if vmlaunch of vmresume is needed */
+ "cmpl $0, %c[launched](%0) \n\t"
+ /* Load guest registers. Don't clobber flags. */
+ "mov %c[cr2](%0), %%"R"ax \n\t"
+ "mov %%"R"ax, %%cr2 \n\t"
+ "mov %c[rax](%0), %%"R"ax \n\t"
+ "mov %c[rbx](%0), %%"R"bx \n\t"
+ "mov %c[rdx](%0), %%"R"dx \n\t"
+ "mov %c[rsi](%0), %%"R"si \n\t"
+ "mov %c[rdi](%0), %%"R"di \n\t"
+ "mov %c[rbp](%0), %%"R"bp \n\t"
#ifdef CONFIG_X86_64
- "mov %c[r8](%0), %%r8 \n\t"
- "mov %c[r9](%0), %%r9 \n\t"
- "mov %c[r10](%0), %%r10 \n\t"
- "mov %c[r11](%0), %%r11 \n\t"
- "mov %c[r12](%0), %%r12 \n\t"
- "mov %c[r13](%0), %%r13 \n\t"
- "mov %c[r14](%0), %%r14 \n\t"
- "mov %c[r15](%0), %%r15 \n\t"
+ "mov %c[r8](%0), %%r8 \n\t"
+ "mov %c[r9](%0), %%r9 \n\t"
+ "mov %c[r10](%0), %%r10 \n\t"
+ "mov %c[r11](%0), %%r11 \n\t"
+ "mov %c[r12](%0), %%r12 \n\t"
+ "mov %c[r13](%0), %%r13 \n\t"
+ "mov %c[r14](%0), %%r14 \n\t"
+ "mov %c[r15](%0), %%r15 \n\t"
#endif
- "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
-
- /* Enter guest mode */
- "jne .Llaunched \n\t"
- __ex(ASM_VMX_VMLAUNCH) "\n\t"
- "jmp .Lkvm_vmx_return \n\t"
- ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
- ".Lkvm_vmx_return: "
- /* Save guest registers, load host registers, keep flags */
- "xchg %0, (%%"R"sp) \n\t"
- "mov %%"R"ax, %c[rax](%0) \n\t"
- "mov %%"R"bx, %c[rbx](%0) \n\t"
- "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
- "mov %%"R"dx, %c[rdx](%0) \n\t"
- "mov %%"R"si, %c[rsi](%0) \n\t"
- "mov %%"R"di, %c[rdi](%0) \n\t"
- "mov %%"R"bp, %c[rbp](%0) \n\t"
+ "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
+
+ /* Enter guest mode */
+ "jne .Llaunched \n\t"
+ __ex(ASM_VMX_VMLAUNCH) "\n\t"
+ "jmp .Lkvm_vmx_return \n\t"
+ ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
+ ".Lkvm_vmx_return: "
+ /* Save guest registers, load host registers, keep flags */
+ "xchg %0, (%%"R"sp) \n\t"
+ "mov %%"R"ax, %c[rax](%0) \n\t"
+ "mov %%"R"bx, %c[rbx](%0) \n\t"
+ "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t"
+ "mov %%"R"dx, %c[rdx](%0) \n\t"
+ "mov %%"R"si, %c[rsi](%0) \n\t"
+ "mov %%"R"di, %c[rdi](%0) \n\t"
+ "mov %%"R"bp, %c[rbp](%0) \n\t"
#ifdef CONFIG_X86_64
- "mov %%r8, %c[r8](%0) \n\t"
- "mov %%r9, %c[r9](%0) \n\t"
- "mov %%r10, %c[r10](%0) \n\t"
- "mov %%r11, %c[r11](%0) \n\t"
- "mov %%r12, %c[r12](%0) \n\t"
- "mov %%r13, %c[r13](%0) \n\t"
- "mov %%r14, %c[r14](%0) \n\t"
- "mov %%r15, %c[r15](%0) \n\t"
+ "mov %%r8, %c[r8](%0) \n\t"
+ "mov %%r9, %c[r9](%0) \n\t"
+ "mov %%r10, %c[r10](%0) \n\t"
+ "mov %%r11, %c[r11](%0) \n\t"
+ "mov %%r12, %c[r12](%0) \n\t"
+ "mov %%r13, %c[r13](%0) \n\t"
+ "mov %%r14, %c[r14](%0) \n\t"
+ "mov %%r15, %c[r15](%0) \n\t"
#endif
- "mov %%cr2, %%"R"ax \n\t"
- "mov %%"R"ax, %c[cr2](%0) \n\t"
-
- "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t"
- "setbe %c[fail](%0) \n\t"
- : : "c"(vmx), "d"((unsigned long)HOST_RSP),
- [launched]"i"(offsetof(struct vcpu_vmx, launched)),
- [fail]"i"(offsetof(struct vcpu_vmx, fail)),
- [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
- [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
- [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
- [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
- [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
- [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
- [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
- [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
+ "mov %%cr2, %%"R"ax \n\t"
+ "mov %%"R"ax, %c[cr2](%0) \n\t"
+
+ "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t"
+ "setbe %c[fail](%0) \n\t"
+ : : "c"(vmx), "d"((unsigned long)HOST_RSP),
+ [launched]"i"(offsetof(struct vcpu_vmx, launched)),
+ [fail]"i"(offsetof(struct vcpu_vmx, fail)),
+ [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
+ [rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
+ [rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
+ [rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
+ [rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
+ [rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
+ [rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
+ [rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
#ifdef CONFIG_X86_64
- [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
- [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
- [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
- [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
- [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
- [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
- [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
- [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
+ [r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
+ [r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
+ [r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
+ [r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
+ [r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
+ [r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
+ [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
+ [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
#endif
- [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
- : "cc", "memory"
- , R"bx", R"di", R"si"
+ [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2))
+ : "cc", "memory"
+ , R"bx", R"di", R"si"
#ifdef CONFIG_X86_64
- , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+ , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#endif
- );
+ );
- vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
- vcpu->arch.regs_dirty = 0;
+ vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
+ vcpu->arch.regs_dirty = 0;
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
- if (vmx->rmode.irq.pending)
- fixup_rmode_irq(vmx);
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+ if (vmx->rmode.irq.pending)
+ fixup_rmode_irq(vmx);
- vcpu->arch.interrupt_window_open =
- (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
- (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
+ vcpu->arch.interrupt_window_open =
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
- asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
- vmx->launched = 1;
+ asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
+ vmx->launched = 1;
- intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- /* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
- (intr_info & INTR_INFO_VALID_MASK)) {
- KVMTRACE_0D(NMI, vcpu, handler);
- asm("int $2");
- }
+ /* We need to handle NMIs before interrupts are enabled */
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
+ (intr_info & INTR_INFO_VALID_MASK)) {
+ KVMTRACE_0D(NMI, vcpu, handler);
+ asm("int $2");
+ }
- vmx_complete_interrupts(vmx);
+ vmx_complete_interrupts(vmx);
+ }
}
#undef R
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
2008-08-13 23:58 [RFC][PATCH v2] VMX: Invalid guest state emulation Mohammed Gamal
@ 2008-08-14 7:26 ` Avi Kivity
2008-08-14 17:56 ` Mohammed Gamal
0 siblings, 1 reply; 6+ messages in thread
From: Avi Kivity @ 2008-08-14 7:26 UTC (permalink / raw)
To: Mohammed Gamal; +Cc: kvm, riel, andrea, guillaume.thouvenin, laurent.vivier
Mohammed Gamal wrote:
> This patch aims to allow emulation whenever guest state is not valid for VMX operation.
> This usually happens in mode switches with guests such as older versions of gfxboot and FreeDOS with HIMEM.
>
> The patch aims to address this issue, it introduces the following:
>
> - A function that invokes the x86 emulator when the guest state is not valid (borrowed from Guillaume Thouvenin's real mode patches)
> - A function that checks that guest register state is VMX compliant
> - A module parameter that enables these operations. It is disabled by default, in order not to intervene with KVM's normal operation
>
> This version adds the following:
>
> - An "emulation required" flag, which is set on mode switches
> - Improved guest state checking functions
> - Emulation is done on guest entry, rather than directly on mode switching utilising the emulation flag.
>
> @@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
> static void enter_pmode(struct kvm_vcpu *vcpu)
> {
> unsigned long flags;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + vmx->emulation_required = 1;
> vcpu->arch.rmode.active = 0;
>
> vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
> @@ -1311,17 +1317,19 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
>
> update_exception_bitmap(vcpu);
>
> - fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
> - fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
> - fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
> - fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
> + if(!emulate_invalid_guest_state) {
> + fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
> + fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
> + fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
> + fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
>
Instead of all this indentation, you can do an 'if
(emulate_invalid_guest_state) return';
> @@ -1351,7 +1359,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
> static void enter_rmode(struct kvm_vcpu *vcpu)
> {
> unsigned long flags;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + vmx->emulation_required = 1;
> vcpu->arch.rmode.active = 1;
>
> vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
> @@ -1373,20 +1383,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
> vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
> update_exception_bitmap(vcpu);
>
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
> - vmcs_write32(GUEST_SS_LIMIT, 0xffff);
> - vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
> + if(!emulate_invalid_guest_state) {
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
> + vmcs_write32(GUEST_SS_LIMIT, 0xffff);
> + vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
>
Again, except you'll have to use a goto. We can later refactor the state
mangling into a separate function.
It's important to keep a complex change like this easy to review.
>
> kvm_mmu_reset_context(vcpu);
> init_rmode(vcpu->kvm);
> @@ -1721,6 +1733,206 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
> vmcs_writel(GUEST_GDTR_BASE, dt->base);
> }
>
> +static bool rmode_segment_invalid(struct kvm_vcpu *vcpu, int seg)
> +{
> + struct kvm_segment var;
> + u32 ar;
> +
> + vmx_get_segment(vcpu, &var, seg);
> + ar = vmx_segment_access_rights(&var);
> +
> + if (var.limit != 0xffff)
> + return true;
> + if (ar != 0xf3)
> + return true;
>
Need additional check for base == selector << 4.
> +
> +static bool code_segment_invalid(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_segment cs;
> +
> + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
> +
> + if (!(cs.type & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)))
> + return true;
>
That doesn't mean what you think it means. It will fail only if both
bits are unset. To check if both bits are set, use (~cs.type &
(MASK1|MASK2)).
> + if ((cs.type <= 0xb) && (cs.type >= 0x8)) {
> + if (cs.dpl != (cs.selector & 3))
> + return true;
> + }
> + if ((cs.type <= 0xf) && (cs.type >= 0xc)) {
> + if (cs.dpl > (cs.selector & 3))
> + return true;
> + }
>
You're looking at the conforming bit, right? If so please use an
explicit bitmask. Also a temporary for the RPL will make the code clearer.
> + if((cs.limit & 0x00000fff) != 0x00000fff) {
> + if(cs.g)
> + return true;
> + }
> + else if(cs.limit & 0xfff00000) {
> + if(!cs.g)
> + return true;
> + }
>
This bit is generic, not cs specific. On the other hand there is no way
the guest can generate an invalid combination, so this can be removed.
> +static bool tr_segment_valid(struct kvm_vcpu *vcpu)
> +{
>
Should be tr_invalid().
I recommend changing the meaning of all the checks to return true on
valid segments, since that's how the specs are written. It will be
easier to validate the code against the docs, and avoids double negatives.
> +static bool ldtr_segment_valid(struct kvm_vcpu *vcpu)
>
invalid
> +{
> + struct kvm_segment ldtr;
> +
> + vmx_get_segment(vcpu, &ldtr, VCPU_SREG_TR);
>
VCPU_SREG_LDTR
> +/*
> + * Check if guest state is valid. Returns true if valid, false if
> + * not.
> + * We assume that registers are always usable
> + */
> +static bool guest_state_invalid(struct kvm_vcpu *vcpu)
> +{
> + /* real mode guest state checks */
> + if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_CS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_SS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_DS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_ES))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_FS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_GS))
> + return true;
> + }
> + else { /* protected mode guest state checks */
> + if(code_segment_invalid(vcpu))
> + return true;
> + if(stack_segment_invalid(vcpu))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_DS))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_ES))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_FS))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_GS))
> + return true;
> + }
> +
> + if (tr_segment_valid(vcpu))
> + return true;
> + if (ldtr_segment_valid(vcpu))
> + return true;
>
In real mode we aren't interested in tr and ldtr. We provide fake ones
for v8086 mode, but they aren't related to the guest.
>
> +static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
> + struct kvm_run *kvm_run)
> +{
> + u8 opcodes[4];
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + unsigned long rip = kvm_rip_read(vcpu);
> + unsigned long rip_linear;
> + int err;
> +
> + while (guest_state_invalid(vcpu)) {
> + rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
> + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
>
unused.
> + err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
> +
> + switch (err) {
> + case EMULATE_DONE:
> + kvm_report_emulation_failure(vcpu, "emulation success");
>
looks redundant.
> + break;
> + case EMULATE_DO_MMIO:
> + kvm_report_emulation_failure(vcpu, "mmio");
> + /* TODO: Handle MMIO */
> + return;
> + default:
> + kvm_report_emulation_failure(vcpu, "emulation failure");
> + return;
> + }
> + }
> +
>
The loop is dangerous, if the guest is in an infinite loop we'll never
exit the kernel.
Either check for signals and need_reshched, or avoid the loop completely.
> + /* Guest state should be valid now, no more emulation should be needed */
> + vmx->emulation_required = 0;
> +}
> +
> /*
> * The exit handlers return 1 if the exit was handled fully and guest execution
> * may resume. Otherwise they set the kvm_run parameter to indicate what needs
> @@ -2969,131 +3214,137 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 intr_info;
>
> - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
> + /* Handle invalid guest state instrad of entering VMX */
> + if (vmx->emulation_required && emulate_invalid_guest_state)
> + handle_invalid_guest_state(vcpu, kvm_run);
> +
> + else {
> + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
>
I'm very attached to vmx_cpu_run(). Please don't indent it like that.
Also, mind your coding style. 'if' is not a function, there should be a
space after it.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
2008-08-14 7:26 ` Avi Kivity
@ 2008-08-14 17:56 ` Mohammed Gamal
2008-08-17 7:38 ` Avi Kivity
0 siblings, 1 reply; 6+ messages in thread
From: Mohammed Gamal @ 2008-08-14 17:56 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, riel, andrea, guillaume.thouvenin, laurent.vivier
[Updated to take coding style and emulation loop issues in consideration]
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Mohammed Gamal <m.gamal005@gmail.com>
---
arch/x86/kvm/vmx.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 235 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..897f9f3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
static int enable_ept = 1;
module_param(enable_ept, bool, 0);
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -86,6 +89,7 @@ struct vcpu_vmx {
} irq;
} rmode;
int vpid;
+ bool emulation_required;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
static void enter_pmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
vcpu->arch.rmode.active = 0;
vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
@@ -1311,6 +1317,9 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
update_exception_bitmap(vcpu);
+ if (emulate_invalid_guest_state)
+ return;
+
fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
@@ -1351,7 +1360,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ vmx->emulation_required = 1;
vcpu->arch.rmode.active = 1;
vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
@@ -1373,6 +1384,9 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
update_exception_bitmap(vcpu);
+ if (emulate_invalid_guest_state)
+ goto continue_rmode;
+
vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
vmcs_write32(GUEST_SS_LIMIT, 0xffff);
vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
@@ -1388,6 +1402,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+continue_rmode:
kvm_mmu_reset_context(vcpu);
init_rmode(vcpu->kvm);
}
@@ -1721,6 +1736,186 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
vmcs_writel(GUEST_GDTR_BASE, dt->base);
}
+static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment var;
+ u32 ar;
+
+ vmx_get_segment(vcpu, &var, seg);
+ ar = vmx_segment_access_rights(&var);
+
+ if (var.base != (var.selector << 4))
+ return false;
+ if (var.limit != 0xffff)
+ return false;
+ if (ar != 0xf3)
+ return false;
+
+ return true;
+}
+
+static bool code_segment_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs;
+ unsigned int cs_rpl;
+
+ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+ cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+
+ if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
+ return false;
+ if (!cs.s)
+ return false;
+ if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+ if (cs.dpl > cs_rpl)
+ return false;
+ } else if (cs.type & AR_TYPE_CODE_MASK) {
+ if (cs.dpl != cs_rpl)
+ return false;
+ }
+ if (!cs.present)
+ return false;
+
+ /* TODO: Add Reserved field check, this'll require a new member in the kvm_segment_field structure */
+ return true;
+}
+
+static bool stack_segment_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment ss;
+ unsigned int ss_rpl;
+
+ vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
+ ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+
+ if ((ss.type != 3) || (ss.type != 7))
+ return false;
+ if (!ss.s)
+ return false;
+ if (ss.dpl != ss_rpl) /* DPL != RPL */
+ return false;
+ if (!ss.present)
+ return false;
+
+ return true;
+}
+
+static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
+{
+ struct kvm_segment var;
+ unsigned int rpl;
+
+ vmx_get_segment(vcpu, &var, seg);
+ rpl = var.selector & SELECTOR_RPL_MASK;
+
+ if (!var.s)
+ return false;
+ if (!var.present)
+ return false;
+ if (~var.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK)) {
+ if (var.dpl < rpl) /* DPL < RPL */
+ return false;
+ }
+
+ /* TODO: Add other members to kvm_segment_field to allow checking for other access
+ * rights flags
+ */
+ return true;
+}
+
+static bool tr_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment tr;
+
+ vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
+
+ if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
+ return false;
+ if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+ return false;
+ if (!tr.present)
+ return false;
+
+ return true;
+}
+
+static bool ldtr_valid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment ldtr;
+
+ vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
+
+ if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
+ return false;
+ if (ldtr.type != 2)
+ return false;
+ if (!ldtr.present)
+ return false;
+
+ return true;
+}
+
+static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs, ss;
+
+ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+ vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
+
+ return ((cs.selector & SELECTOR_RPL_MASK) ==
+ (ss.selector & SELECTOR_RPL_MASK));
+}
+
+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_valid(struct kvm_vcpu *vcpu)
+{
+ /* real mode guest state checks */
+ if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
+ return false;
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
+ return false;
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_DS))
+ return false;
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_ES))
+ return false;
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_FS))
+ return false;
+ if (!rmode_segment_valid(vcpu, VCPU_SREG_GS))
+ return false;
+ }
+ else { /* protected mode guest state checks */
+ if (!cs_ss_rpl_check(vcpu))
+ return false;
+ if (!code_segment_valid(vcpu))
+ return false;
+ if (!stack_segment_valid(vcpu))
+ return false;
+ if (!data_segment_valid(vcpu, VCPU_SREG_DS))
+ return false;
+ if (!data_segment_valid(vcpu, VCPU_SREG_ES))
+ return false;
+ if (!data_segment_valid(vcpu, VCPU_SREG_FS))
+ return false;
+ if (!data_segment_valid(vcpu, VCPU_SREG_GS))
+ return false;
+ if (!tr_valid(vcpu))
+ return false;
+ if (!ldtr_valid(vcpu))
+ return false;
+ }
+ /* TODO:
+ * - Add checks on RIP
+ * - Add checks on RFLAGS
+ */
+
+ return true;
+}
+
static int init_rmode_tss(struct kvm *kvm)
{
gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT;
@@ -2132,6 +2327,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
ret = 0;
+ /* HACK: Don't enable emulation on guest boot/reset */
+ vmx->emulation_required = 0;
+
out:
up_read(&vcpu->kvm->slots_lock);
return ret;
@@ -2708,6 +2906,37 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
+ struct kvm_run *kvm_run)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int err;
+
+ while (!guest_state_valid(vcpu)) {
+ err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
+
+ switch (err) {
+ case EMULATE_DONE:
+ break;
+ case EMULATE_DO_MMIO:
+ kvm_report_emulation_failure(vcpu, "mmio");
+ /* TODO: Handle MMIO */
+ return;
+ default:
+ kvm_report_emulation_failure(vcpu, "emulation failure");
+ return;
+ }
+
+ if (signal_pending(current))
+ break;
+ if (need_resched())
+ schedule();
+ }
+
+ /* Guest state should be valid now, no more emulation should be needed */
+ vmx->emulation_required = 0;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -2969,6 +3198,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
+ /* Handle invalid guest state instead of entering VMX */
+ if (vmx->emulation_required && emulate_invalid_guest_state) {
+ handle_invalid_guest_state(vcpu, kvm_run);
+ return;
+ }
+
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
2008-08-14 17:56 ` Mohammed Gamal
@ 2008-08-17 7:38 ` Avi Kivity
2008-08-17 10:00 ` Mohammed Gamal
0 siblings, 1 reply; 6+ messages in thread
From: Avi Kivity @ 2008-08-17 7:38 UTC (permalink / raw)
To: Mohammed Gamal; +Cc: kvm, riel, andrea, guillaume.thouvenin, laurent.vivier
Mohammed Gamal wrote:
> [Updated to take coding style and emulation loop issues in consideration]
>
>
Applied, thanks.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
2008-08-17 7:38 ` Avi Kivity
@ 2008-08-17 10:00 ` Mohammed Gamal
2008-08-17 12:03 ` Avi Kivity
0 siblings, 1 reply; 6+ messages in thread
From: Mohammed Gamal @ 2008-08-17 10:00 UTC (permalink / raw)
To: Avi Kivity; +Cc: kvm, riel, andrea, guillaume.thouvenin, laurent.vivier
On Sun, Aug 17, 2008 at 10:38 AM, Avi Kivity <avi@qumranet.com> wrote:
> Mohammed Gamal wrote:
>>
>> [Updated to take coding style and emulation loop issues in consideration]
>>
>>
>
> Applied, thanks.
>
Would it be a problem if I re-send this patch as a patch series of
smaller chunks?
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
2008-08-17 10:00 ` Mohammed Gamal
@ 2008-08-17 12:03 ` Avi Kivity
0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2008-08-17 12:03 UTC (permalink / raw)
To: Mohammed Gamal; +Cc: kvm, riel, andrea, guillaume.thouvenin, laurent.vivier
Mohammed Gamal wrote:
> On Sun, Aug 17, 2008 at 10:38 AM, Avi Kivity <avi@qumranet.com> wrote:
>
>> Mohammed Gamal wrote:
>>
>>> [Updated to take coding style and emulation loop issues in consideration]
>>>
>>>
>>>
>> Applied, thanks.
>>
>>
>
> Would it be a problem if I re-send this patch as a patch series of
> smaller chunks?
>
I can revert it easily as I hadn't pushed it out yet, so no problem.
--
Do not meddle in the internals of kernels, for they are subtle and quick to panic.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2008-08-17 12:03 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-13 23:58 [RFC][PATCH v2] VMX: Invalid guest state emulation Mohammed Gamal
2008-08-14 7:26 ` Avi Kivity
2008-08-14 17:56 ` Mohammed Gamal
2008-08-17 7:38 ` Avi Kivity
2008-08-17 10:00 ` Mohammed Gamal
2008-08-17 12:03 ` Avi Kivity
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox