From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume Date: Thu, 03 Sep 2009 00:38:16 +0300 Message-ID: <4A9EE5C8.8040107@redhat.com> References: <1251905916-2834-1-git-send-email-oritw@il.ibm.com> <1251905916-2834-2-git-send-email-oritw@il.ibm.com> <1251905916-2834-3-git-send-email-oritw@il.ibm.com> <1251905916-2834-4-git-send-email-oritw@il.ibm.com> <1251905916-2834-5-git-send-email-oritw@il.ibm.com> <1251905916-2834-6-git-send-email-oritw@il.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, benami@il.ibm.com, muli@il.ibm.com, abelg@il.ibm.com, aliguori@us.ibm.com, mmday@us.ibm.com To: oritw@il.ibm.com Return-path: Received: from mx1.redhat.com ([209.132.183.28]:55898 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751528AbZIBVh4 (ORCPT ); Wed, 2 Sep 2009 17:37:56 -0400 In-Reply-To: <1251905916-2834-6-git-send-email-oritw@il.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote: > -struct nested_vmx { > - /* Has the level1 guest done vmon? */ > +struct nested_vmx { /* Has the level1 guest done vmon? */ > A \n died here. > bool vmon; > /* Has the level1 guest done vmclear? */ > bool vmclear; > + > + /* Are we running nested guest */ > + bool nested_mode; > + > + /* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */ > + bool nested_run_pending; > + > + /* flag indicating if there was a valid IDT after exiting from l2 */ > + bool nested_pending_valid_idt; > What does this mean? pending event? > > + > +static inline int nested_cpu_has_vmx_tpr_shadow(struct kvm_vcpu *vcpu) > +{ > + return to_vmx(vcpu)->nested.l2_state->shadow_vmcs-> > + cpu_based_vm_exec_control& CPU_BASED_TPR_SHADOW; > +} > Don't we need to check if the host supports it too? > +static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu > + *vcpu) > +{ > + struct shadow_vmcs *shadow = to_vmx(vcpu)->nested.l2_state->shadow_vmcs; > + > + return (shadow->secondary_vm_exec_control& > + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)&& > + to_vmx(vcpu)->nested.l2_state->shadow_vmcs->apic_access_addr != 0; > +} > Why check apic_access_addr? > + > +static inline int nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu) > +{ > + return to_vmx(vcpu)->nested.l2_state->shadow_vmcs-> > + secondary_vm_exec_control& SECONDARY_EXEC_ENABLE_EPT; > +} > Need to check if secondary controls enabled? > +static void vmx_set_irq(struct kvm_vcpu *vcpu) > +{ > + if (to_vmx(vcpu)->nested.nested_mode) > + return; > Why? Note if the guest didn't enable external interrupt exiting, we need to inject as usual. > > +static int nested_handle_pending_idt(struct kvm_vcpu *vcpu) > +{ > Again the name is confusing. pending_event_injection? > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int irq; > + int type; > + int errCodeValid; > + u32 idt_vectoring_info; > + u32 guest_intr; > + bool nmi_window_open; > + bool interrupt_window_open; > + > + if (vmx->nested.nested_mode&& vmx->nested.nested_pending_valid_idt) { > + idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); > + irq = idt_vectoring_info& VECTORING_INFO_VECTOR_MASK; > + type = idt_vectoring_info& VECTORING_INFO_TYPE_MASK; > + errCodeValid = idt_vectoring_info& > + VECTORING_INFO_DELIVER_CODE_MASK; > + > + guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); > + nmi_window_open = > + !(guest_intr& (GUEST_INTR_STATE_STI | > + GUEST_INTR_STATE_MOV_SS | > + GUEST_INTR_STATE_NMI)); > + > + interrupt_window_open = > + ((vmcs_readl(GUEST_RFLAGS)& X86_EFLAGS_IF)&& > + !(guest_intr& (GUEST_INTR_STATE_STI | > + GUEST_INTR_STATE_MOV_SS))); > + > + if (type == INTR_TYPE_EXT_INTR&& !interrupt_window_open) { > + printk(KERN_INFO "IDT ignored, l2 interrupt window closed!\n"); > + return 0; > + } > How can this happen? Unless it's on nested entry, in which case we need to abort the entry. > + > #ifdef CONFIG_X86_64 > #define R "r" > #define Q "q" > @@ -4646,6 +4842,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > > + nested_handle_pending_idt(vcpu); > You're not checking the return code (need to do that on entry). > + > + if (vmx->nested.nested_mode) { > + vmcs_writel(GUEST_CR0, vmx->nested.l2_state->shadow_vmcs->guest_cr0); > Might not be legal. We may also want to force-enable caching. Lastly, don't we need to handle cr0.ts and ct0.mp specially to manage the fpu state? > > + if (vmx->nested.nested_mode) > + vmx->nested.vmclear = 0; > + > Why? > free_vmcs: > @@ -5122,6 +5339,228 @@ static int shadow_vmcs_load(struct kvm_vcpu *vcpu) > return 0; > } > > +void prepare_vmcs_12(struct kvm_vcpu *vcpu) > +{ > + struct shadow_vmcs *l2_shadow_vmcs = > + to_vmx(vcpu)->nested.l2_state->shadow_vmcs; > + struct shadow_vmcs *l1_shadow_vmcs = > + to_vmx(vcpu)->nested.l1_state->shadow_vmcs; > + > + l2_shadow_vmcs->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR); > + l2_shadow_vmcs->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR); > + l2_shadow_vmcs->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR); > + l2_shadow_vmcs->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR); > + l2_shadow_vmcs->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR); > + l2_shadow_vmcs->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR); > + l2_shadow_vmcs->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR); > + l2_shadow_vmcs->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR); > + > + l1_shadow_vmcs->host_es_selector = vmcs_read16(HOST_ES_SELECTOR); > + l1_shadow_vmcs->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR); > + l1_shadow_vmcs->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR); > + l1_shadow_vmcs->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR); > + l1_shadow_vmcs->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR); > + l1_shadow_vmcs->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR); > + l1_shadow_vmcs->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR); > + > + l2_shadow_vmcs->tsc_offset = vmcs_read64(TSC_OFFSET); > + l2_shadow_vmcs->guest_physical_address = > + vmcs_read64(GUEST_PHYSICAL_ADDRESS); > + l2_shadow_vmcs->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER); > + l2_shadow_vmcs->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); > + if (vmcs_config.vmentry_ctrl& VM_ENTRY_LOAD_IA32_PAT) > + l2_shadow_vmcs->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); > + l2_shadow_vmcs->cr3_target_count = vmcs_read32(CR3_TARGET_COUNT); > + l2_shadow_vmcs->vm_entry_intr_info_field = > + vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); > + l2_shadow_vmcs->vm_entry_exception_error_code = > + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE); > + l2_shadow_vmcs->vm_entry_instruction_len = > + vmcs_read32(VM_ENTRY_INSTRUCTION_LEN); > + l2_shadow_vmcs->vm_instruction_error = > + vmcs_read32(VM_INSTRUCTION_ERROR); > + l2_shadow_vmcs->vm_exit_reason = vmcs_read32(VM_EXIT_REASON); > + l2_shadow_vmcs->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); > + l2_shadow_vmcs->vm_exit_intr_error_code = > + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); > + l2_shadow_vmcs->idt_vectoring_info_field = > + vmcs_read32(IDT_VECTORING_INFO_FIELD); > + l2_shadow_vmcs->idt_vectoring_error_code = > + vmcs_read32(IDT_VECTORING_ERROR_CODE); > + l2_shadow_vmcs->vm_exit_instruction_len = > + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); > + l2_shadow_vmcs->vmx_instruction_info = > + vmcs_read32(VMX_INSTRUCTION_INFO); > + l2_shadow_vmcs->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT); > + l2_shadow_vmcs->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT); > + l2_shadow_vmcs->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT); > + l2_shadow_vmcs->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT); > + l2_shadow_vmcs->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT); > + l2_shadow_vmcs->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT); > + l2_shadow_vmcs->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT); > + l2_shadow_vmcs->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT); > + l2_shadow_vmcs->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT); > + l2_shadow_vmcs->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT); > + l2_shadow_vmcs->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES); > + l2_shadow_vmcs->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); > + l2_shadow_vmcs->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); > + l2_shadow_vmcs->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES); > + l2_shadow_vmcs->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES); > + l2_shadow_vmcs->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES); > + l2_shadow_vmcs->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES); > + l2_shadow_vmcs->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES); > + l2_shadow_vmcs->guest_interruptibility_info = > + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); > + l2_shadow_vmcs->guest_activity_state = > + vmcs_read32(GUEST_ACTIVITY_STATE); > + l2_shadow_vmcs->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); > + > + l1_shadow_vmcs->host_ia32_sysenter_cs = > + vmcs_read32(HOST_IA32_SYSENTER_CS); > + > + l2_shadow_vmcs->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW); > + l2_shadow_vmcs->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW); > + l2_shadow_vmcs->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + l2_shadow_vmcs->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS); > + l2_shadow_vmcs->guest_cr0 = vmcs_readl(GUEST_CR0); > + > + l2_shadow_vmcs->guest_cr4 = vmcs_readl(GUEST_CR4); > + l2_shadow_vmcs->guest_es_base = vmcs_readl(GUEST_ES_BASE); > + l2_shadow_vmcs->guest_cs_base = vmcs_readl(GUEST_CS_BASE); > + l2_shadow_vmcs->guest_ss_base = vmcs_readl(GUEST_SS_BASE); > + l2_shadow_vmcs->guest_ds_base = vmcs_readl(GUEST_DS_BASE); > + l2_shadow_vmcs->guest_fs_base = vmcs_readl(GUEST_FS_BASE); > + l2_shadow_vmcs->guest_gs_base = vmcs_readl(GUEST_GS_BASE); > + l2_shadow_vmcs->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE); > + l2_shadow_vmcs->guest_tr_base = vmcs_readl(GUEST_TR_BASE); > + l2_shadow_vmcs->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE); > + l2_shadow_vmcs->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE); > + l2_shadow_vmcs->guest_dr7 = vmcs_readl(GUEST_DR7); > + l2_shadow_vmcs->guest_rsp = vmcs_readl(GUEST_RSP); > + l2_shadow_vmcs->guest_rip = vmcs_readl(GUEST_RIP); > + l2_shadow_vmcs->guest_rflags = vmcs_readl(GUEST_RFLAGS); > + l2_shadow_vmcs->guest_pending_dbg_exceptions = > + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); > + l2_shadow_vmcs->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); > + l2_shadow_vmcs->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); > + > + l1_shadow_vmcs->host_cr0 = vmcs_readl(HOST_CR0); > + l1_shadow_vmcs->host_cr3 = vmcs_readl(HOST_CR3); > + l1_shadow_vmcs->host_cr4 = vmcs_readl(HOST_CR4); > + l1_shadow_vmcs->host_fs_base = vmcs_readl(HOST_FS_BASE); > + l1_shadow_vmcs->host_gs_base = vmcs_readl(HOST_GS_BASE); > + l1_shadow_vmcs->host_tr_base = vmcs_readl(HOST_TR_BASE); > + l1_shadow_vmcs->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE); > + l1_shadow_vmcs->host_idtr_base = vmcs_readl(HOST_IDTR_BASE); > + l1_shadow_vmcs->host_ia32_sysenter_esp = > + vmcs_readl(HOST_IA32_SYSENTER_ESP); > + l1_shadow_vmcs->host_ia32_sysenter_eip = > + vmcs_readl(HOST_IA32_SYSENTER_EIP); > + l1_shadow_vmcs->host_rsp = vmcs_readl(HOST_RSP); > + l1_shadow_vmcs->host_rip = vmcs_readl(HOST_RIP); > +} > Can't we do it lazily? Only read these on demand? > + > +int load_vmcs_common(struct shadow_vmcs *src) > +{ > + vmcs_write16(GUEST_ES_SELECTOR, src->guest_es_selector); > + vmcs_write16(GUEST_CS_SELECTOR, src->guest_cs_selector); > + vmcs_write16(GUEST_SS_SELECTOR, src->guest_ss_selector); > + vmcs_write16(GUEST_DS_SELECTOR, src->guest_ds_selector); > + vmcs_write16(GUEST_FS_SELECTOR, src->guest_fs_selector); > + vmcs_write16(GUEST_GS_SELECTOR, src->guest_gs_selector); > + vmcs_write16(GUEST_LDTR_SELECTOR, src->guest_ldtr_selector); > + vmcs_write16(GUEST_TR_SELECTOR, src->guest_tr_selector); > + > + vmcs_write64(VMCS_LINK_POINTER, src->vmcs_link_pointer); > + vmcs_write64(GUEST_IA32_DEBUGCTL, src->guest_ia32_debugctl); > + > + if (vmcs_config.vmentry_ctrl& VM_ENTRY_LOAD_IA32_PAT) > + vmcs_write64(GUEST_IA32_PAT, src->guest_ia32_pat); > + > + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, src->vm_entry_msr_load_count); > + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, src->vm_entry_intr_info_field); > + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, > + src->vm_entry_exception_error_code); > + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, src->vm_entry_instruction_len); > + > + vmcs_write32(GUEST_ES_LIMIT, src->guest_es_limit); > + vmcs_write32(GUEST_CS_LIMIT, src->guest_cs_limit); > + vmcs_write32(GUEST_SS_LIMIT, src->guest_ss_limit); > + vmcs_write32(GUEST_DS_LIMIT, src->guest_ds_limit); > + vmcs_write32(GUEST_FS_LIMIT, src->guest_fs_limit); > + vmcs_write32(GUEST_GS_LIMIT, src->guest_gs_limit); > + vmcs_write32(GUEST_LDTR_LIMIT, src->guest_ldtr_limit); > + vmcs_write32(GUEST_TR_LIMIT, src->guest_tr_limit); > + vmcs_write32(GUEST_GDTR_LIMIT, src->guest_gdtr_limit); > + vmcs_write32(GUEST_IDTR_LIMIT, src->guest_idtr_limit); > + vmcs_write32(GUEST_ES_AR_BYTES, src->guest_es_ar_bytes); > + vmcs_write32(GUEST_CS_AR_BYTES, src->guest_cs_ar_bytes); > + vmcs_write32(GUEST_SS_AR_BYTES, src->guest_ss_ar_bytes); > + vmcs_write32(GUEST_DS_AR_BYTES, src->guest_ds_ar_bytes); > + vmcs_write32(GUEST_FS_AR_BYTES, src->guest_fs_ar_bytes); > + vmcs_write32(GUEST_GS_AR_BYTES, src->guest_gs_ar_bytes); > + vmcs_write32(GUEST_LDTR_AR_BYTES, src->guest_ldtr_ar_bytes); > + vmcs_write32(GUEST_TR_AR_BYTES, src->guest_tr_ar_bytes); > + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, > + src->guest_interruptibility_info); > + vmcs_write32(GUEST_ACTIVITY_STATE, src->guest_activity_state); > + vmcs_write32(GUEST_SYSENTER_CS, src->guest_sysenter_cs); > + > + vmcs_writel(GUEST_ES_BASE, src->guest_es_base); > + vmcs_writel(GUEST_CS_BASE, src->guest_cs_base); > + vmcs_writel(GUEST_SS_BASE, src->guest_ss_base); > + vmcs_writel(GUEST_DS_BASE, src->guest_ds_base); > + vmcs_writel(GUEST_FS_BASE, src->guest_fs_base); > + vmcs_writel(GUEST_GS_BASE, src->guest_gs_base); > + vmcs_writel(GUEST_LDTR_BASE, src->guest_ldtr_base); > + vmcs_writel(GUEST_TR_BASE, src->guest_tr_base); > + vmcs_writel(GUEST_GDTR_BASE, src->guest_gdtr_base); > + vmcs_writel(GUEST_IDTR_BASE, src->guest_idtr_base); > + vmcs_writel(GUEST_DR7, src->guest_dr7); > + vmcs_writel(GUEST_RSP, src->guest_rsp); > + vmcs_writel(GUEST_RIP, src->guest_rip); > + vmcs_writel(GUEST_RFLAGS, src->guest_rflags); > + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, > + src->guest_pending_dbg_exceptions); > + vmcs_writel(GUEST_SYSENTER_ESP, src->guest_sysenter_esp); > + vmcs_writel(GUEST_SYSENTER_EIP, src->guest_sysenter_eip); > + > + return 0; > +} > If we do it lazily, we'll only need to reload bits that have changed. > struct level_state *create_state(void) > { > struct level_state *state = NULL; > @@ -5176,6 +5615,685 @@ int create_l2_state(struct kvm_vcpu *vcpu) > > return 0; > } > +int prepare_vmcs_02(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + struct shadow_vmcs *src = vmx->nested.l2_state->shadow_vmcs; > + u32 exec_control; > + > + if (!src) { > + printk(KERN_INFO "%s: Error no shadow vmcs\n", __func__); > + return 1; > + } > + > + load_vmcs_common(src); > + > + if (cpu_has_vmx_vpid()&& vmx->nested.l2_state->vpid != 0) > + vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.l2_state->vpid); > + > + if (vmx->nested.l2_state->io_bitmap_a) > + vmcs_write64(IO_BITMAP_A, vmx->nested.l2_state->io_bitmap_a); > + > + if (vmx->nested.l2_state->io_bitmap_b) > + vmcs_write64(IO_BITMAP_B, vmx->nested.l2_state->io_bitmap_b); > + > + if (vmx->nested.l2_state->msr_bitmap) > + vmcs_write64(MSR_BITMAP, vmx->nested.l2_state->msr_bitmap); > Don't we need to combine the host and guest msr bitmaps and I/O bitmaps? If the host doesn't allow an msr or I/O access to the guest, it shouldn't allow it to nested guests. > + > + if (src->vm_entry_msr_load_count> 0) { > + struct page *page; > + > + page = nested_get_page(vcpu, > + src->vm_entry_msr_load_addr); > + if (!page) > + return 1; > + > + vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, page_to_phys(page)); > Luckily we don't use the msr autoload stuff. If we did we'd have to merge it too. But We have to emulate those loads (via vmx_set_msr), the guest can easily load bad msrs which would kill the host. > + if (src->virtual_apic_page_addr != 0) { > + struct page *page; > + > + page = nested_get_page(vcpu, > + src->virtual_apic_page_addr); > + if (!page) > + return 1; > + > + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(page)); > + > + kvm_release_page_clean(page); > + } else { > + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, > + src->virtual_apic_page_addr); > + } > Don't understand the special zero value. > + > + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, > + (vmx->nested.l1_state->shadow_vmcs->pin_based_vm_exec_control | > + src->pin_based_vm_exec_control)); > + > + exec_control = vmx->nested.l1_state->shadow_vmcs->cpu_based_vm_exec_control; > + > + exec_control&= ~CPU_BASED_VIRTUAL_INTR_PENDING; > + > + exec_control&= ~CPU_BASED_VIRTUAL_NMI_PENDING; > + > + exec_control&= ~CPU_BASED_TPR_SHADOW; > Why? > + if (enable_vpid) { > + if (vmx->nested.l2_state->vpid == 0) { > + allocate_vpid(vmx); > + vmx->nested.l2_state->vpid = vmx->vpid; > What if the guest has a nonzero vpid? > +static int nested_vmx_vmexit(struct kvm_vcpu *vcpu, > + bool is_interrupt) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + int initial_pfu_active = vcpu->fpu_active; > + > + if (!vmx->nested.nested_mode) { > + printk(KERN_INFO "WARNING: %s called but not in nested mode\n", > + __func__); > + return 0; > + } > + > + save_msrs(vmx->guest_msrs, vmx->save_nmsrs); > + > + sync_cached_regs_to_vmcs(vcpu); > + > + prepare_vmcs_12(vcpu); > + if (is_interrupt) > + vmx->nested.l2_state->shadow_vmcs->vm_exit_reason = > + EXIT_REASON_EXTERNAL_INTERRUPT; > Need to auto-ack the interrupt if requested by the guest. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain.