public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] VMX: Invalid guest state emulation
@ 2008-08-03  2:08 Mohammed Gamal
  2008-08-03 13:26 ` Mohammed Gamal
  2008-08-10  8:09 ` Avi Kivity
  0 siblings, 2 replies; 7+ messages in thread
From: Mohammed Gamal @ 2008-08-03  2:08 UTC (permalink / raw)
  To: kvm; +Cc: avi, riel, andrea, guillaume.thouvenin

This patch aims to allow emulation whenever guest state is not valid for VMX operation. This usually happens in mode switches with guests such as 
older versions of gfxboot and FreeDOS with HIMEM. 

The patch aims to address this issue, it introduces the following:

- A function that invokes the x86 emulator when the guest state is not valid (borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, in order not to intervene with KVM's normal operation

---

 arch/x86/kvm/vmx.c |  234 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 225 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..61da1e3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1,4 +1,4 @@
-/*
+t/
  * Kernel-based Virtual Machine driver for Linux
  *
  * This module enables machines with Intel VT-x extensions to run virtual
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
 static int enable_ept = 1;
 module_param(enable_ept, bool, 0);
 
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
 struct vmcs {
 	u32 revision_id;
 	u32 abort;
@@ -95,6 +98,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 
 static int init_rmode(struct kvm *kvm);
 static u64 construct_eptp(unsigned long root_hpa);
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1275,6 +1279,177 @@ static __exit void hardware_unsetup(void)
 	free_kvm_area();
 }
 
+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_valid(struct kvm_vcpu *vcpu)
+{
+	u16 cs,ds,ss,es,fs,gs,tr,ldtr;
+	u64 cs_limit, ds_limit, ss_limit, es_limit, fs_limit, gs_limit;
+	u64 tr_limit, ldtr_limit;
+	u32 cs_ar, ds_ar, ss_ar, es_ar, fs_ar, gs_ar, tr_ar, ldtr_ar;
+	
+	cs = vmcs_read16(GUEST_CS_SELECTOR);
+	ds = vmcs_read16(GUEST_DS_SELECTOR);
+	ss = vmcs_read16(GUEST_SS_SELECTOR);
+	es = vmcs_read16(GUEST_ES_SELECTOR);
+	fs = vmcs_read16(GUEST_FS_SELECTOR);
+	gs = vmcs_read16(GUEST_GS_SELECTOR);
+	tr = vmcs_read16(GUEST_TR_SELECTOR);
+	ldtr = vmcs_read16(GUEST_LDTR_SELECTOR);
+
+	cs_limit = vmcs_readl(GUEST_SS_LIMIT);
+	ds_limit = vmcs_readl(GUEST_DS_LIMIT);
+	ss_limit = vmcs_readl(GUEST_SS_LIMIT);
+	es_limit = vmcs_readl(GUEST_ES_LIMIT);
+	fs_limit = vmcs_readl(GUEST_FS_LIMIT);
+	gs_limit = vmcs_readl(GUEST_GS_LIMIT);
+	tr_limit = vmcs_readl(GUEST_TR_LIMIT);
+
+	cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+	ds_ar = vmcs_read32(GUEST_DS_AR_BYTES);
+	ss_ar = vmcs_read32(GUEST_SS_AR_BYTES);
+	es_ar = vmcs_read32(GUEST_ES_AR_BYTES);
+	fs_ar = vmcs_read32(GUEST_FS_AR_BYTES);
+	gs_ar = vmcs_read32(GUEST_GS_AR_BYTES);
+	tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+	ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES);
+
+	if(tr & 0x02)	/* TI = 1 */
+		return false;
+
+	if(ldtr & 0x02) /* TI = 1 */
+		return false;
+
+	/* vm86 mode guest state checks */
+	if(vcpu->arch.rmode.active) {
+		/* Check segment limits */
+		if( (cs_limit != 0xffff) || (ds_limit != 0xffff) ||
+		    (ss_limit != 0xffff) || (es_limit != 0xffff) ||
+		    (fs_limit != 0xffff) || (gs_limit != 0xffff) )
+			return false;
+		
+		/* Check access rights */
+		if( (cs_ar != 0xf3) || (ds_ar != 0xf3) || (ss_ar != 0xf3) ||
+		    (es_ar != 0xf3) || (fs_ar != 0xf3) || (gs_ar != 0xf3) )
+			return false;
+	}
+	else { /* protected mode guest state checks */
+		
+		/* SS RPL bits must equal CS RPL bits */ 
+		if((cs & 0x03) != (ss & 0x03))
+			return false;
+		
+		/* Begin access rights bits check */
+		
+		/* Type field checks */
+		if(! (cs_ar & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)) )
+			return false;
+		if( ((ss_ar & AR_TYPE_MASK) != 3) || ((ss_ar & AR_TYPE_MASK) != 7) )
+			return false;
+		if(! ((ds_ar & AR_TYPE_ACCESSES_MASK) || (es_ar & AR_TYPE_ACCESSES_MASK) || 
+		      (fs_ar & AR_TYPE_ACCESSES_MASK) || (gs_ar & AR_TYPE_ACCESSES_MASK)) )
+			return false;
+		if(! (( (ds_ar & AR_TYPE_CODE_MASK) && (ds_ar & AR_TYPE_READABLE_MASK) ) ||
+		      ( (es_ar & AR_TYPE_CODE_MASK) && (es_ar & AR_TYPE_READABLE_MASK) ) ||
+		      ( (fs_ar & AR_TYPE_CODE_MASK) && (fs_ar & AR_TYPE_READABLE_MASK) ) ||
+		      ( (gs_ar & AR_TYPE_CODE_MASK) && (gs_ar & AR_TYPE_READABLE_MASK) )) )
+			return false;
+
+		/* S field checks */
+		if(! ((cs_ar & AR_S_MASK) || (ds_ar & AR_S_MASK) || (ss_ar & AR_S_MASK) ||
+		      (es_ar & AR_S_MASK) || (fs_ar & AR_S_MASK) || (gs_ar & AR_S_MASK)) )
+			return false;
+
+		/* DPL field checks */
+		if( ((cs_ar & AR_TYPE_MASK) <= 0xb) && ((cs_ar & AR_TYPE_MASK) >= 0x8) ) {
+			if(AR_DPL(cs_ar) != (cs & 0x03))
+				return false;
+		}
+		else if( ((cs_ar & AR_TYPE_MASK) <= 0xf) && ((cs_ar & AR_TYPE_MASK) >= 0xc) ) {
+			if(AR_DPL(cs_ar) > (cs & 0x03))
+				return false;
+		}
+
+		if(AR_DPL(ss_ar) != (ss & 0x03))
+			return false;
+
+		if((ds_ar & AR_TYPE_MASK) <= 0xb)
+			if(AR_DPL(ds_ar) < (ds & 0x03))
+				return false;
+		if((es_ar & AR_TYPE_MASK) <= 0xb)
+			if(AR_DPL(es_ar) < (es & 0x03))
+				return false;
+		if((fs_ar & AR_TYPE_MASK) <= 0xb)
+			if(AR_DPL(fs_ar) < (fs & 0x03))
+				return false;
+		if((gs_ar & AR_TYPE_MASK) <= 0xb)
+			if(AR_DPL(gs_ar) < (gs & 0x03))
+				return false; 
+
+		/* P field check */
+		if(! ((cs_ar & AR_P_MASK) || (ds_ar & AR_P_MASK) || (ss_ar & AR_P_MASK) ||
+		      (es_ar & AR_P_MASK) || (fs_ar & AR_P_MASK) || (gs_ar & AR_P_MASK)) )
+			return false;
+
+		/* Reserved fields check */
+		if( (cs_ar & AR_RESERVD_MASK) || (ds_ar & AR_RESERVD_MASK) || 
+		    (ss_ar & AR_RESERVD_MASK) || (es_ar & AR_RESERVD_MASK) ||
+		    (fs_ar & AR_RESERVD_MASK) || (gs_ar & AR_RESERVD_MASK) )
+			return false;
+
+		/* TODO: 
+ 		 * - Add checks on G and D/B fields
+ 		 * - Add checks on the unusable mask
+ 		 */
+	}
+
+	/* TR access rights bits checks */
+	if(tr_ar & AR_S_MASK)
+		return false;
+	if((tr_ar & AR_P_MASK) != 1)
+		return false;
+	if(tr_ar & AR_RESERVD_MASK)
+		return false;
+	if(tr_ar & AR_UNUSABLE_MASK)
+		return false;
+	if((tr_limit & 0x00000fff) != 0x00000fff) {
+		if(tr_ar & AR_G_MASK)
+			return false;
+	}
+	else if(tr_limit & 0xffff0000) {
+		if(!(tr_ar & AR_G_MASK))
+			return false;
+	}
+	
+	/* LDTR access right bits checks */
+	if(!(ldtr_ar & AR_TYPE_LDT))
+		return false;
+	if(ldtr_ar & AR_S_MASK)
+		return false;
+	if((ldtr_ar & AR_P_MASK) != 1)
+		return false;
+	if(ldtr_ar & AR_RESERVD_MASK)
+		return false;
+	if((ldtr_limit & 0x00000fff) != 0x00000fff) {
+		if(ldtr_ar & AR_G_MASK)
+			return false;
+	}
+	else if(ldtr_limit & 0xffff0000) {
+		if(ldtr_ar & AR_G_MASK)
+			return false;
+	}
+
+	/* TODO:
+ 	 * - Add checks on RIP
+ 	 * - Add checks on RFLAGS
+ 	 */
+ 	
+	return true;
+}
+
 static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1311,10 +1486,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 	update_exception_bitmap(vcpu);
 
-	fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+	if(!emulate_invalid_guest_state) {
+		fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+		fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+		fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+		fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+	}
 
 	vmcs_write16(GUEST_SS_SELECTOR, 0);
 	vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1322,6 +1499,13 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	vmcs_write16(GUEST_CS_SELECTOR,
 		     vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
 	vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+	if(emulate_invalid_guest_state) {
+		while(!guest_state_valid(vcpu)) {
+			if(!invalid_guest_state_handler(vcpu))
+				break;
+		}
+	}
 }
 
 static gva_t rmode_tss_base(struct kvm *kvm)
@@ -1383,13 +1567,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 		vmcs_writel(GUEST_CS_BASE, 0xf0000);
 	vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
 
-	fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
-	fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
-	fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
-	fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+	if(!emulate_invalid_guest_state) {
+		fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+		fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+		fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+		fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+	}
 
 	kvm_mmu_reset_context(vcpu);
 	init_rmode(vcpu->kvm);
+	
+	if(emulate_invalid_guest_state) {
+		while(!guest_state_valid(vcpu)) {
+			if(!invalid_guest_state_handler(vcpu))
+				break;
+		}
+	}
 }
 
 #ifdef CONFIG_X86_64
@@ -2708,6 +2901,29 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu)
+{
+	u8 opcodes[4];
+	unsigned long rip = kvm_rip_read(vcpu);
+	unsigned long rip_linear;
+	int err;
+	
+	rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
+	emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+	err = emulate_instruction(vcpu, NULL, 0, 0, 0);
+		
+	switch (err) {
+		case EMULATE_DONE:
+			return 1;
+		case EMULATE_DO_MMIO:
+			printk(KERN_INFO "mmio?\n");
+			return 0;
+		default:
+			kvm_report_emulation_failure(vcpu, "vmentry failure");
+			return 0;
+	}
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2008-08-11  8:53 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-03  2:08 [RFC][PATCH] VMX: Invalid guest state emulation Mohammed Gamal
2008-08-03 13:26 ` Mohammed Gamal
2008-08-04  8:48   ` Guillaume Thouvenin
2008-08-04 10:46     ` Mohammed Gamal
2008-08-10  8:09 ` Avi Kivity
2008-08-10 18:45   ` Mohammed Gamal
2008-08-11  8:53     ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox