From: Mohammed Gamal <m.gamal005@gmail.com>
To: kvm@vger.kernel.org
Cc: avi@qumranet.com, riel@surriel.com, andrea@qumranet.com,
guillaume.thouvenin@ext.bull.net, laurent.vivier@bull.net
Subject: Re: [RFC][PATCH] VMX: Invalid guest state emulation
Date: Sun, 3 Aug 2008 16:26:01 +0300 [thread overview]
Message-ID: <20080803132601.GA9575@mohd-laptop> (raw)
In-Reply-To: <20080803020826.GA20831@mohd-laptop>
[This resend adds the sign-offs and fixes a typo that was in the previous patch]
This patch aims to allow emulation whenever guest state is not valid for VMX operation, which occurs while trying to emulate big real mode on guests
such as older versions of gfxboot and FreeDOS with HIMEM.
The patch aims to address this issue, it introduces the following:
- A function that invokes the x86 emulator when the guest state is not valid (borrowed from Guillaume Thouvenin's real mode patches)
- A function that checks that guest register state is VMX compliant
- A module parameter that enables these operations. It is disabled by default, in order not to intervene with KVM's normal operation
The next step needed in order to correctly emulate real mode would be to add more instructions in the x86 emulator whenever needed.
Signed-off-by: Laurent Vivier <laurent.vivier@bull.net>
Signed-off-by: Guillaume Thouvenin <guillaume.thouvenin@ext.bull.net>
Signed-off-by: Mohammed Gamal <m.gamal005@gmail.com>
---
arch/x86/kvm/vmx.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 224 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c4510fe..2b5dd68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -49,6 +49,9 @@ module_param(flexpriority_enabled, bool, 0);
static int enable_ept = 1;
module_param(enable_ept, bool, 0);
+static int emulate_invalid_guest_state = 0;
+module_param(emulate_invalid_guest_state, bool, 0);
+
struct vmcs {
u32 revision_id;
u32 abort;
@@ -95,6 +98,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
static int init_rmode(struct kvm *kvm);
static u64 construct_eptp(unsigned long root_hpa);
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -1275,6 +1279,177 @@ static __exit void hardware_unsetup(void)
free_kvm_area();
}
+/*
+ * Check if guest state is valid. Returns true if valid, false if
+ * not.
+ * We assume that registers are always usable
+ */
+static bool guest_state_valid(struct kvm_vcpu *vcpu)
+{
+ u16 cs,ds,ss,es,fs,gs,tr,ldtr;
+ u64 cs_limit, ds_limit, ss_limit, es_limit, fs_limit, gs_limit;
+ u64 tr_limit, ldtr_limit;
+ u32 cs_ar, ds_ar, ss_ar, es_ar, fs_ar, gs_ar, tr_ar, ldtr_ar;
+
+ cs = vmcs_read16(GUEST_CS_SELECTOR);
+ ds = vmcs_read16(GUEST_DS_SELECTOR);
+ ss = vmcs_read16(GUEST_SS_SELECTOR);
+ es = vmcs_read16(GUEST_ES_SELECTOR);
+ fs = vmcs_read16(GUEST_FS_SELECTOR);
+ gs = vmcs_read16(GUEST_GS_SELECTOR);
+ tr = vmcs_read16(GUEST_TR_SELECTOR);
+ ldtr = vmcs_read16(GUEST_LDTR_SELECTOR);
+
+ cs_limit = vmcs_readl(GUEST_SS_LIMIT);
+ ds_limit = vmcs_readl(GUEST_DS_LIMIT);
+ ss_limit = vmcs_readl(GUEST_SS_LIMIT);
+ es_limit = vmcs_readl(GUEST_ES_LIMIT);
+ fs_limit = vmcs_readl(GUEST_FS_LIMIT);
+ gs_limit = vmcs_readl(GUEST_GS_LIMIT);
+ tr_limit = vmcs_readl(GUEST_TR_LIMIT);
+
+ cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+ ds_ar = vmcs_read32(GUEST_DS_AR_BYTES);
+ ss_ar = vmcs_read32(GUEST_SS_AR_BYTES);
+ es_ar = vmcs_read32(GUEST_ES_AR_BYTES);
+ fs_ar = vmcs_read32(GUEST_FS_AR_BYTES);
+ gs_ar = vmcs_read32(GUEST_GS_AR_BYTES);
+ tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES);
+
+ if(tr & 0x02) /* TI = 1 */
+ return false;
+
+ if(ldtr & 0x02) /* TI = 1 */
+ return false;
+
+ /* vm86 mode guest state checks */
+ if(vcpu->arch.rmode.active) {
+ /* Check segment limits */
+ if( (cs_limit != 0xffff) || (ds_limit != 0xffff) ||
+ (ss_limit != 0xffff) || (es_limit != 0xffff) ||
+ (fs_limit != 0xffff) || (gs_limit != 0xffff) )
+ return false;
+
+ /* Check access rights */
+ if( (cs_ar != 0xf3) || (ds_ar != 0xf3) || (ss_ar != 0xf3) ||
+ (es_ar != 0xf3) || (fs_ar != 0xf3) || (gs_ar != 0xf3) )
+ return false;
+ }
+ else { /* protected mode guest state checks */
+
+ /* SS RPL bits must equal CS RPL bits */
+ if((cs & 0x03) != (ss & 0x03))
+ return false;
+
+ /* Begin access rights bits check */
+
+ /* Type field checks */
+ if(! (cs_ar & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)) )
+ return false;
+ if( ((ss_ar & AR_TYPE_MASK) != 3) || ((ss_ar & AR_TYPE_MASK) != 7) )
+ return false;
+ if(! ((ds_ar & AR_TYPE_ACCESSES_MASK) || (es_ar & AR_TYPE_ACCESSES_MASK) ||
+ (fs_ar & AR_TYPE_ACCESSES_MASK) || (gs_ar & AR_TYPE_ACCESSES_MASK)) )
+ return false;
+ if(! (( (ds_ar & AR_TYPE_CODE_MASK) && (ds_ar & AR_TYPE_READABLE_MASK) ) ||
+ ( (es_ar & AR_TYPE_CODE_MASK) && (es_ar & AR_TYPE_READABLE_MASK) ) ||
+ ( (fs_ar & AR_TYPE_CODE_MASK) && (fs_ar & AR_TYPE_READABLE_MASK) ) ||
+ ( (gs_ar & AR_TYPE_CODE_MASK) && (gs_ar & AR_TYPE_READABLE_MASK) )) )
+ return false;
+
+ /* S field checks */
+ if(! ((cs_ar & AR_S_MASK) || (ds_ar & AR_S_MASK) || (ss_ar & AR_S_MASK) ||
+ (es_ar & AR_S_MASK) || (fs_ar & AR_S_MASK) || (gs_ar & AR_S_MASK)) )
+ return false;
+
+ /* DPL field checks */
+ if( ((cs_ar & AR_TYPE_MASK) <= 0xb) && ((cs_ar & AR_TYPE_MASK) >= 0x8) ) {
+ if(AR_DPL(cs_ar) != (cs & 0x03))
+ return false;
+ }
+ else if( ((cs_ar & AR_TYPE_MASK) <= 0xf) && ((cs_ar & AR_TYPE_MASK) >= 0xc) ) {
+ if(AR_DPL(cs_ar) > (cs & 0x03))
+ return false;
+ }
+
+ if(AR_DPL(ss_ar) != (ss & 0x03))
+ return false;
+
+ if((ds_ar & AR_TYPE_MASK) <= 0xb)
+ if(AR_DPL(ds_ar) < (ds & 0x03))
+ return false;
+ if((es_ar & AR_TYPE_MASK) <= 0xb)
+ if(AR_DPL(es_ar) < (es & 0x03))
+ return false;
+ if((fs_ar & AR_TYPE_MASK) <= 0xb)
+ if(AR_DPL(fs_ar) < (fs & 0x03))
+ return false;
+ if((gs_ar & AR_TYPE_MASK) <= 0xb)
+ if(AR_DPL(gs_ar) < (gs & 0x03))
+ return false;
+
+ /* P field check */
+ if(! ((cs_ar & AR_P_MASK) || (ds_ar & AR_P_MASK) || (ss_ar & AR_P_MASK) ||
+ (es_ar & AR_P_MASK) || (fs_ar & AR_P_MASK) || (gs_ar & AR_P_MASK)) )
+ return false;
+
+ /* Reserved fields check */
+ if( (cs_ar & AR_RESERVD_MASK) || (ds_ar & AR_RESERVD_MASK) ||
+ (ss_ar & AR_RESERVD_MASK) || (es_ar & AR_RESERVD_MASK) ||
+ (fs_ar & AR_RESERVD_MASK) || (gs_ar & AR_RESERVD_MASK) )
+ return false;
+
+ /* TODO:
+ * - Add checks on G and D/B fields
+ * - Add checks on the unusable mask
+ */
+ }
+
+ /* TR access rights bits checks */
+ if(tr_ar & AR_S_MASK)
+ return false;
+ if((tr_ar & AR_P_MASK) != 1)
+ return false;
+ if(tr_ar & AR_RESERVD_MASK)
+ return false;
+ if(tr_ar & AR_UNUSABLE_MASK)
+ return false;
+ if((tr_limit & 0x00000fff) != 0x00000fff) {
+ if(tr_ar & AR_G_MASK)
+ return false;
+ }
+ else if(tr_limit & 0xffff0000) {
+ if(!(tr_ar & AR_G_MASK))
+ return false;
+ }
+
+ /* LDTR access right bits checks */
+ if(!(ldtr_ar & AR_TYPE_LDT))
+ return false;
+ if(ldtr_ar & AR_S_MASK)
+ return false;
+ if((ldtr_ar & AR_P_MASK) != 1)
+ return false;
+ if(ldtr_ar & AR_RESERVD_MASK)
+ return false;
+ if((ldtr_limit & 0x00000fff) != 0x00000fff) {
+ if(ldtr_ar & AR_G_MASK)
+ return false;
+ }
+ else if(ldtr_limit & 0xffff0000) {
+ if(ldtr_ar & AR_G_MASK)
+ return false;
+ }
+
+ /* TODO:
+ * - Add checks on RIP
+ * - Add checks on RFLAGS
+ */
+
+ return true;
+}
+
static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
{
struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1311,10 +1486,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
update_exception_bitmap(vcpu);
- fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
- fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
- fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
- fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ if(!emulate_invalid_guest_state) {
+ fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+ fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+ fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+ fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ }
vmcs_write16(GUEST_SS_SELECTOR, 0);
vmcs_write32(GUEST_SS_AR_BYTES, 0x93);
@@ -1322,6 +1499,13 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_CS_SELECTOR,
vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK);
vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
+
+ if(emulate_invalid_guest_state) {
+ while(!guest_state_valid(vcpu)) {
+ if(!invalid_guest_state_handler(vcpu))
+ break;
+ }
+ }
}
static gva_t rmode_tss_base(struct kvm *kvm)
@@ -1383,13 +1567,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
vmcs_writel(GUEST_CS_BASE, 0xf0000);
vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
- fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
- fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
- fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
- fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ if(!emulate_invalid_guest_state) {
+ fix_rmode_seg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
+ fix_rmode_seg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
+ fix_rmode_seg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
+ fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ }
kvm_mmu_reset_context(vcpu);
init_rmode(vcpu->kvm);
+
+ if(emulate_invalid_guest_state) {
+ while(!guest_state_valid(vcpu)) {
+ if(!invalid_guest_state_handler(vcpu))
+ break;
+ }
+ }
}
#ifdef CONFIG_X86_64
@@ -2708,6 +2901,29 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
+static int invalid_guest_state_handler(struct kvm_vcpu *vcpu)
+{
+ u8 opcodes[4];
+ unsigned long rip = kvm_rip_read(vcpu);
+ unsigned long rip_linear;
+ int err;
+
+ rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
+ emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+ err = emulate_instruction(vcpu, NULL, 0, 0, 0);
+
+ switch (err) {
+ case EMULATE_DONE:
+ return 1;
+ case EMULATE_DO_MMIO:
+ printk(KERN_INFO "mmio?\n");
+ return 0;
+ default:
+ kvm_report_emulation_failure(vcpu, "vmentry failure");
+ return 0;
+ }
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
next prev parent reply other threads:[~2008-08-03 13:26 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-03 2:08 [RFC][PATCH] VMX: Invalid guest state emulation Mohammed Gamal
2008-08-03 13:26 ` Mohammed Gamal [this message]
2008-08-04 8:48 ` Guillaume Thouvenin
2008-08-04 10:46 ` Mohammed Gamal
2008-08-10 8:09 ` Avi Kivity
2008-08-10 18:45 ` Mohammed Gamal
2008-08-11 8:53 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080803132601.GA9575@mohd-laptop \
--to=m.gamal005@gmail.com \
--cc=andrea@qumranet.com \
--cc=avi@qumranet.com \
--cc=guillaume.thouvenin@ext.bull.net \
--cc=kvm@vger.kernel.org \
--cc=laurent.vivier@bull.net \
--cc=riel@surriel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox