From: Avi Kivity <avi@qumranet.com>
To: Mohammed Gamal <m.gamal005@gmail.com>
Cc: kvm@vger.kernel.org, riel@surriel.com, andrea@qumranet.com,
guillaume.thouvenin@ext.bull.net, laurent.vivier@bull.net
Subject: Re: [RFC][PATCH v2] VMX: Invalid guest state emulation
Date: Thu, 14 Aug 2008 10:26:02 +0300 [thread overview]
Message-ID: <48A3DE0A.6040208@qumranet.com> (raw)
In-Reply-To: <20080813235222.GA9899@mohd-laptop>
Mohammed Gamal wrote:
> This patch aims to allow emulation whenever guest state is not valid for VMX operation.
> This usually happens in mode switches with guests such as older versions of gfxboot and FreeDOS with HIMEM.
>
> The patch aims to address this issue, it introduces the following:
>
> - A function that invokes the x86 emulator when the guest state is not valid (borrowed from Guillaume Thouvenin's real mode patches)
> - A function that checks that guest register state is VMX compliant
> - A module parameter that enables these operations. It is disabled by default, in order not to intervene with KVM's normal operation
>
> This version adds the following:
>
> - An "emulation required" flag, which is set on mode switches
> - Improved guest state checking functions
> - Emulation is done on guest entry, rather than directly on mode switching utilising the emulation flag.
>
> @@ -1294,7 +1298,9 @@ static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
> static void enter_pmode(struct kvm_vcpu *vcpu)
> {
> unsigned long flags;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + vmx->emulation_required = 1;
> vcpu->arch.rmode.active = 0;
>
> vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base);
> @@ -1311,17 +1317,19 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
>
> update_exception_bitmap(vcpu);
>
> - fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
> - fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
> - fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
> - fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
> + if(!emulate_invalid_guest_state) {
> + fix_pmode_dataseg(VCPU_SREG_ES, &vcpu->arch.rmode.es);
> + fix_pmode_dataseg(VCPU_SREG_DS, &vcpu->arch.rmode.ds);
> + fix_pmode_dataseg(VCPU_SREG_GS, &vcpu->arch.rmode.gs);
> + fix_pmode_dataseg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
>
Instead of all this indentation, you can do an 'if
(emulate_invalid_guest_state) return';
> @@ -1351,7 +1359,9 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
> static void enter_rmode(struct kvm_vcpu *vcpu)
> {
> unsigned long flags;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> + vmx->emulation_required = 1;
> vcpu->arch.rmode.active = 1;
>
> vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
> @@ -1373,20 +1383,22 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
> vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME);
> update_exception_bitmap(vcpu);
>
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
> - vmcs_write32(GUEST_SS_LIMIT, 0xffff);
> - vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
> + if(!emulate_invalid_guest_state) {
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
> + vmcs_write32(GUEST_SS_LIMIT, 0xffff);
> + vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
>
Again, except you'll have to use a goto. We can later refactor the state
mangling into a separate function.
It's important to keep a complex change like this easy to review.
>
> kvm_mmu_reset_context(vcpu);
> init_rmode(vcpu->kvm);
> @@ -1721,6 +1733,206 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
> vmcs_writel(GUEST_GDTR_BASE, dt->base);
> }
>
> +static bool rmode_segment_invalid(struct kvm_vcpu *vcpu, int seg)
> +{
> + struct kvm_segment var;
> + u32 ar;
> +
> + vmx_get_segment(vcpu, &var, seg);
> + ar = vmx_segment_access_rights(&var);
> +
> + if (var.limit != 0xffff)
> + return true;
> + if (ar != 0xf3)
> + return true;
>
Need additional check for base == selector << 4.
> +
> +static bool code_segment_invalid(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_segment cs;
> +
> + vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
> +
> + if (!(cs.type & (AR_TYPE_ACCESSES_MASK|AR_TYPE_CODE_MASK)))
> + return true;
>
That doesn't mean what you think it means. It will fail only if both
bits are unset. To check if both bits are set, use (~cs.type &
(MASK1|MASK2)).
> + if ((cs.type <= 0xb) && (cs.type >= 0x8)) {
> + if (cs.dpl != (cs.selector & 3))
> + return true;
> + }
> + if ((cs.type <= 0xf) && (cs.type >= 0xc)) {
> + if (cs.dpl > (cs.selector & 3))
> + return true;
> + }
>
You're looking at the conforming bit, right? If so please use an
explicit bitmask. Also a temporary for the RPL will make the code clearer.
> + if((cs.limit & 0x00000fff) != 0x00000fff) {
> + if(cs.g)
> + return true;
> + }
> + else if(cs.limit & 0xfff00000) {
> + if(!cs.g)
> + return true;
> + }
>
This bit is generic, not cs specific. On the other hand there is no way
the guest can generate an invalid combination, so this can be removed.
> +static bool tr_segment_valid(struct kvm_vcpu *vcpu)
> +{
>
Should be tr_invalid().
I recommend changing the meaning of all the checks to return true on
valid segments, since that's how the specs are written. It will be
easier to validate the code against the docs, and avoids double negatives.
> +static bool ldtr_segment_valid(struct kvm_vcpu *vcpu)
>
invalid
> +{
> + struct kvm_segment ldtr;
> +
> + vmx_get_segment(vcpu, &ldtr, VCPU_SREG_TR);
>
VCPU_SREG_LDTR
> +/*
> + * Check if guest state is valid. Returns true if valid, false if
> + * not.
> + * We assume that registers are always usable
> + */
> +static bool guest_state_invalid(struct kvm_vcpu *vcpu)
> +{
> + /* real mode guest state checks */
> + if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_CS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_SS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_DS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_ES))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_FS))
> + return true;
> + if(rmode_segment_invalid(vcpu, VCPU_SREG_GS))
> + return true;
> + }
> + else { /* protected mode guest state checks */
> + if(code_segment_invalid(vcpu))
> + return true;
> + if(stack_segment_invalid(vcpu))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_DS))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_ES))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_FS))
> + return true;
> + if(data_segment_invalid(vcpu, VCPU_SREG_GS))
> + return true;
> + }
> +
> + if (tr_segment_valid(vcpu))
> + return true;
> + if (ldtr_segment_valid(vcpu))
> + return true;
>
In real mode we aren't interested in tr and ldtr. We provide fake ones
for v8086 mode, but they aren't related to the guest.
>
> +static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
> + struct kvm_run *kvm_run)
> +{
> + u8 opcodes[4];
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + unsigned long rip = kvm_rip_read(vcpu);
> + unsigned long rip_linear;
> + int err;
> +
> + while (guest_state_invalid(vcpu)) {
> + rip_linear = rip + vmx_get_segment_base(vcpu, VCPU_SREG_CS);
> + emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
>
unused.
> + err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
> +
> + switch (err) {
> + case EMULATE_DONE:
> + kvm_report_emulation_failure(vcpu, "emulation success");
>
looks redundant.
> + break;
> + case EMULATE_DO_MMIO:
> + kvm_report_emulation_failure(vcpu, "mmio");
> + /* TODO: Handle MMIO */
> + return;
> + default:
> + kvm_report_emulation_failure(vcpu, "emulation failure");
> + return;
> + }
> + }
> +
>
The loop is dangerous, if the guest is in an infinite loop we'll never
exit the kernel.
Either check for signals and need_reshched, or avoid the loop completely.
> + /* Guest state should be valid now, no more emulation should be needed */
> + vmx->emulation_required = 0;
> +}
> +
> /*
> * The exit handlers return 1 if the exit was handled fully and guest execution
> * may resume. Otherwise they set the kvm_run parameter to indicate what needs
> @@ -2969,131 +3214,137 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 intr_info;
>
> - if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> - vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> - if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> - vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
> + /* Handle invalid guest state instrad of entering VMX */
> + if (vmx->emulation_required && emulate_invalid_guest_state)
> + handle_invalid_guest_state(vcpu, kvm_run);
> +
> + else {
> + if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
> + vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
> + if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
> + vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
>
I'm very attached to vmx_cpu_run(). Please don't indent it like that.
Also, mind your coding style. 'if' is not a function, there should be a
space after it.
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
next prev parent reply other threads:[~2008-08-14 7:26 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-13 23:58 [RFC][PATCH v2] VMX: Invalid guest state emulation Mohammed Gamal
2008-08-14 7:26 ` Avi Kivity [this message]
2008-08-14 17:56 ` Mohammed Gamal
2008-08-17 7:38 ` Avi Kivity
2008-08-17 10:00 ` Mohammed Gamal
2008-08-17 12:03 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=48A3DE0A.6040208@qumranet.com \
--to=avi@qumranet.com \
--cc=andrea@qumranet.com \
--cc=guillaume.thouvenin@ext.bull.net \
--cc=kvm@vger.kernel.org \
--cc=laurent.vivier@bull.net \
--cc=m.gamal005@gmail.com \
--cc=riel@surriel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox