From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Yang, Sheng" Subject: Re: [PATCH 8/8] KVM: VMX: Enable EPT feature for KVM Date: Mon, 28 Apr 2008 12:46:57 +0800 Message-ID: <200804281246.57671.sheng.yang@intel.com> References: <200804251627.38029.sheng.yang@intel.com> <4814496B.2080101@qumranet.com> Mime-Version: 1.0 Content-Type: Multipart/Mixed; boundary="Boundary-00=_BbVFI62qp6kE3lz" Cc: kvm-devel To: Avi Kivity Return-path: In-Reply-To: <4814496B.2080101@qumranet.com> Content-Disposition: inline List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: kvm-devel-bounces@lists.sourceforge.net Errors-To: kvm-devel-bounces@lists.sourceforge.net List-Id: kvm.vger.kernel.org --Boundary-00=_BbVFI62qp6kE3lz Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline On Sunday 27 April 2008 17:37:47 Avi Kivity wrote: > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index 3dbedf1..7a8640a 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -1177,8 +1177,15 @@ static int __direct_map(struct kvm_vcpu *vcpu, > > gpa_t v, int write, > > return -ENOMEM; > > } > > > > - table[index] =3D __pa(new_table->spt) | > > PT_PRESENT_MASK - | PT_WRITABLE_MASK | > > shadow_user_mask; + if (shadow_user_mask) > > + table[index] =3D __pa(new_table->spt) > > + | PT_PRESENT_MASK | > > PT_WRITABLE_MASK + | > > shadow_user_mask; > > + else > > + table[index] =3D __pa(new_table->spt) > > + | PT_PRESENT_MASK | > > PT_WRITABLE_MASK + | shadow_x_mask; > > Why not do, unconditionally: > > + table[index] =3D __pa(new_table->spt) | > PT_PRESENT_MASK + | PT_WRITABLE_MASK | > shadow_user_mask | shadow_x_mask; > > ? In fact, I just don't like the method that shadow_x_mask and shadow_user_ma= sk=20 was controlled by programmer rather than the function logical. But I can't= =20 find more proper method. I merged them now.=20 The updated patch also moved ept_load_pdptrs() from critical region to=20 kvm_handle_vmexit(). =2D- =46rom c10e92efd8db22d90f71a370e2300efbd4aeb3a9 Mon Sep 17 00:00:00 2001 =46rom: Sheng Yang Date: Mon, 28 Apr 2008 12:24:45 +0800 Subject: [PATCH] KVM: VMX: Enable EPT feature for KVM Signed-off-by: Sheng Yang =2D-- arch/x86/kvm/mmu.c | 5 +- arch/x86/kvm/vmx.c | 229=20 ++++++++++++++++++++++++++++++++++++++++++-- arch/x86/kvm/vmx.h | 9 ++ include/asm-x86/kvm_host.h | 1 + 4 files changed, 234 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3dbedf1..d9344be 100644 =2D-- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t = v,=20 int write, return -ENOMEM; } =2D table[index] =3D __pa(new_table->spt) | PT_PRESENT_MASK =2D | PT_WRITABLE_MASK | shadow_user_mask; + table[index] =3D __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask; } table_addr =3D table[index] & PT64_BASE_ADDR_MASK; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index de5f615..bfe4db1 100644 =2D-- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled =3D 1; module_param(flexpriority_enabled, bool, 0); =2Dstatic int enable_ept; +static int enable_ept =3D 1; module_param(enable_ept, bool, 0); struct vmcs { @@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t= =20 gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } +static inline void __invept(int ext, u64 eptp, gpa_t gpa) +{ + struct { + u64 eptp, gpa; + } operand =3D {eptp, gpa}; + + asm volatile (ASM_VMX_INVEPT + /* CF=3D=3D1 or ZF=3D=3D1 --> rc =3D -1 */ + "; ja 1f ; ud2 ; 1:\n" + : : "a" (&operand), "c" (ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx= =20 *vmx) __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } +static inline void ept_sync_global(void) +{ + if (cpu_has_vmx_invept_global()) + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); +} + +static inline void ept_sync_context(u64 eptp) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); + } +} + +static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_individual_addr()) + __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, + eptp, gpa); + else + ept_sync_context(eptp); + } +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vc= pu) eb |=3D 1u << 1; if (vcpu->arch.rmode.active) eb =3D ~0; + if (vm_need_ept()) + eb &=3D ~(1u << PF_VECTOR); /* bypass_guest_pf =3D 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vc= pu=20 *vcpu) vcpu->arch.cr4 |=3D vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); + return; + } + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); + } +} + +static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl | + (CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 =3D cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + *hw_cr0 |=3D X86_CR0_PE | X86_CR0_PG; + *hw_cr0 &=3D ~X86_CR0_WP; + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl & + ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 =3D cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + if (!(vcpu->arch.cr0 & X86_CR0_WP)) + *hw_cr0 &=3D ~X86_CR0_WP; + } +} + +static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, + struct kvm_vcpu *vcpu) +{ + if (!is_paging(vcpu)) { + *hw_cr4 &=3D ~X86_CR4_PAE; + *hw_cr4 |=3D X86_CR4_PSE; + } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) + *hw_cr4 &=3D ~X86_CR4_PAE; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + unsigned long hw_cr0 =3D (cr0 & ~KVM_GUEST_CR0_MASK) | + KVM_VM_CR0_ALWAYS_ON; + vmx_fpu_deactivate(vcpu); if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) @@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,=20 unsigned long cr0) } #endif + if (vm_need_ept()) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + vmcs_writel(CR0_READ_SHADOW, cr0); =2D vmcs_writel(GUEST_CR0, =2D (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); + vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 =3D cr0; if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } +static u64 construct_eptp(unsigned long root_hpa) +{ + u64 eptp; + + /* TODO write the value reading from MSR */ + eptp =3D VMX_EPT_DEFAULT_MT | + VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; + eptp |=3D (root_hpa & PAGE_MASK); + + return eptp; +} + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 =3D cr3; + if (vm_need_ept()) { + eptp =3D construct_eptp(cr3); + vmcs_write64(EPT_POINTER, eptp); + ept_sync_context(eptp); + ept_load_pdptrs(vcpu); + guest_cr3 =3D is_paging(vcpu) ? vcpu->arch.cr3 : + VMX_EPT_IDENTITY_PAGETABLE_ADDR; + } + vmx_flush_tlb(vcpu); =2D vmcs_writel(GUEST_CR3, cr3); + vmcs_writel(GUEST_CR3, guest_cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { =2D vmcs_writel(CR4_READ_SHADOW, cr4); =2D vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? =2D KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); + unsigned long hw_cr4 =3D cr4 | (vcpu->arch.rmode.active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + vcpu->arch.cr4 =3D cr4; + if (vm_need_ept()) + ept_update_paging_mode_cr4(&hw_cr4, vcpu); + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); } static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu,=20 struct kvm_run *kvm_run) if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code =3D vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { + /* EPT won't cause page fault directly */ + if (vm_need_ept()) + BUG(); cr2 =3D vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); @@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu,= =20 struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } +static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run=20 *kvm_run) +{ + u64 exit_qualification; + enum emulation_result er; + gpa_t gpa; + unsigned long hva; + int gla_validity; + int r; + + exit_qualification =3D vmcs_read64(EXIT_QUALIFICATION); + + if (exit_qualification & (1 << 6)) { + printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); + return -ENOTSUPP; + } + + gla_validity =3D (exit_qualification >> 7) & 0x3; + if (gla_validity !=3D 0x3 && gla_validity !=3D 0x1 && gla_validity !=3D 0= ) { + printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + kvm_run->exit_reason =3D KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason =3D 0; + return -ENOTSUPP; + } + + gpa =3D vmcs_read64(GUEST_PHYSICAL_ADDRESS); + hva =3D gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!kvm_is_error_hva(hva)) { + r =3D kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + if (r < 0) { + printk(KERN_ERR "EPT: Not enough memory!\n"); + return -ENOMEM; + } + return 1; + } else { + /* must be MMIO */ + er =3D emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (er =3D=3D EMULATE_FAIL) { + printk(KERN_ERR + "EPT: Fail to handle EPT violation vmexit!er is %d\n", + er); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + return -ENOTSUPP; + } else if (er =3D=3D EMULATE_DO_MMIO) + return 0; + } + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest=20 execution * may resume. Otherwise they set the kvm_run parameter to indicate what= =20 needs @@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu= =20 *vcpu, [EXIT_REASON_APIC_ACCESS] =3D handle_apic_access, [EXIT_REASON_WBINVD] =3D handle_wbinvd, [EXIT_REASON_TASK_SWITCH] =3D handle_task_switch, + [EXIT_REASON_EPT_VIOLATION] =3D handle_ept_violation, }; static const int kvm_vmx_max_exit_handlers =3D @@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run,= =20 struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (vm_need_ept() && is_paging(vcpu)) { + vcpu->arch.cr3 =3D vmcs_readl(GUEST_CR3); + ept_load_pdptrs(vcpu); + } + if (unlikely(vmx->fail)) { kvm_run->exit_reason =3D KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run,=20 struct kvm_vcpu *vcpu) } if ((vectoring_info & VECTORING_INFO_VALID_MASK) && =2D exit_reason !=3D EXIT_REASON_EXCEPTION_NMI) + (exit_reason !=3D EXIT_REASON_EXCEPTION_NMI && + exit_reason !=3D EXIT_REASON_EPT_VIOLATION)) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers @@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm=20 *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); + if (id =3D=3D 0 && vm_need_ept()) { + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } err =3D kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -2975,9 +3183,14 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); + if (cpu_has_vmx_ept()) + bypass_guest_pf =3D 0; + if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); + ept_sync_global(); + return 0; out2: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index f97eccc..79d94c6 100644 =2D-- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -353,6 +353,15 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) +#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index f03ef75..54a8f77 100644 =2D-- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -650,6 +650,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu,= =20 u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" #define MSR_IA32_TIME_STAMP_COUNTER 0x010 =2D- 1.5.4.5 --Boundary-00=_BbVFI62qp6kE3lz Content-Type: text/x-diff; charset="iso-8859-1"; name="0008-KVM-VMX-Enable-EPT-feature-for-KVM.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="0008-KVM-VMX-Enable-EPT-feature-for-KVM.patch" =46rom c10e92efd8db22d90f71a370e2300efbd4aeb3a9 Mon Sep 17 00:00:00 2001 =46rom: Sheng Yang Date: Mon, 28 Apr 2008 12:24:45 +0800 Subject: [PATCH] KVM: VMX: Enable EPT feature for KVM Signed-off-by: Sheng Yang =2D-- arch/x86/kvm/mmu.c | 5 +- arch/x86/kvm/vmx.c | 229 ++++++++++++++++++++++++++++++++++++++++= ++-- arch/x86/kvm/vmx.h | 9 ++ include/asm-x86/kvm_host.h | 1 + 4 files changed, 234 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3dbedf1..d9344be 100644 =2D-- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1177,8 +1177,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t = v, int write, return -ENOMEM; } =20 =2D table[index] =3D __pa(new_table->spt) | PT_PRESENT_MASK =2D | PT_WRITABLE_MASK | shadow_user_mask; + table[index] =3D __pa(new_table->spt) + | PT_PRESENT_MASK | PT_WRITABLE_MASK + | shadow_user_mask | shadow_x_mask; } table_addr =3D table[index] & PT64_BASE_ADDR_MASK; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index de5f615..bfe4db1 100644 =2D-- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -42,7 +42,7 @@ module_param(enable_vpid, bool, 0); static int flexpriority_enabled =3D 1; module_param(flexpriority_enabled, bool, 0); =20 =2Dstatic int enable_ept; +static int enable_ept =3D 1; module_param(enable_ept, bool, 0); =20 struct vmcs { @@ -284,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t = gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } =20 +static inline void __invept(int ext, u64 eptp, gpa_t gpa) +{ + struct { + u64 eptp, gpa; + } operand =3D {eptp, gpa}; + + asm volatile (ASM_VMX_INVEPT + /* CF=3D=3D1 or ZF=3D=3D1 --> rc =3D -1 */ + "; ja 1f ; ud2 ; 1:\n" + : : "a" (&operand), "c" (ext) : "cc", "memory"); +} + static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -335,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx = *vmx) __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0); } =20 +static inline void ept_sync_global(void) +{ + if (cpu_has_vmx_invept_global()) + __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); +} + +static inline void ept_sync_context(u64 eptp) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_context()) + __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); + else + ept_sync_global(); + } +} + +static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) +{ + if (vm_need_ept()) { + if (cpu_has_vmx_invept_individual_addr()) + __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, + eptp, gpa); + else + ept_sync_context(eptp); + } +} + static unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -422,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vc= pu) eb |=3D 1u << 1; if (vcpu->arch.rmode.active) eb =3D ~0; + if (vm_need_ept()) + eb &=3D ~(1u << PF_VECTOR); /* bypass_guest_pf =3D 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } =20 @@ -1352,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vc= pu *vcpu) vcpu->arch.cr4 |=3D vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; } =20 +static void ept_load_pdptrs(struct kvm_vcpu *vcpu) +{ + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_ERR "EPT: Fail to load pdptrs!\n"); + return; + } + vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]); + } +} + +static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); + +static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, + unsigned long cr0, + struct kvm_vcpu *vcpu) +{ + if (!(cr0 & X86_CR0_PG)) { + /* From paging/starting to nonpaging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl | + (CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 =3D cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + *hw_cr0 |=3D X86_CR0_PE | X86_CR0_PG; + *hw_cr0 &=3D ~X86_CR0_WP; + } else if (!is_paging(vcpu)) { + /* From nonpaging to paging */ + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, + vmcs_config.cpu_based_exec_ctrl & + ~(CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING)); + vcpu->arch.cr0 =3D cr0; + vmx_set_cr4(vcpu, vcpu->arch.cr4); + if (!(vcpu->arch.cr0 & X86_CR0_WP)) + *hw_cr0 &=3D ~X86_CR0_WP; + } +} + +static void ept_update_paging_mode_cr4(unsigned long *hw_cr4, + struct kvm_vcpu *vcpu) +{ + if (!is_paging(vcpu)) { + *hw_cr4 &=3D ~X86_CR4_PAE; + *hw_cr4 |=3D X86_CR4_PSE; + } else if (!(vcpu->arch.cr4 & X86_CR4_PAE)) + *hw_cr4 &=3D ~X86_CR4_PAE; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { + unsigned long hw_cr0 =3D (cr0 & ~KVM_GUEST_CR0_MASK) | + KVM_VM_CR0_ALWAYS_ON; + vmx_fpu_deactivate(vcpu); =20 if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) @@ -1371,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsi= gned long cr0) } #endif =20 + if (vm_need_ept()) + ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); + vmcs_writel(CR0_READ_SHADOW, cr0); =2D vmcs_writel(GUEST_CR0, =2D (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON); + vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 =3D cr0; =20 if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE)) vmx_fpu_activate(vcpu); } =20 +static u64 construct_eptp(unsigned long root_hpa) +{ + u64 eptp; + + /* TODO write the value reading from MSR */ + eptp =3D VMX_EPT_DEFAULT_MT | + VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT; + eptp |=3D (root_hpa & PAGE_MASK); + + return eptp; +} + static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + unsigned long guest_cr3; + u64 eptp; + + guest_cr3 =3D cr3; + if (vm_need_ept()) { + eptp =3D construct_eptp(cr3); + vmcs_write64(EPT_POINTER, eptp); + ept_sync_context(eptp); + ept_load_pdptrs(vcpu); + guest_cr3 =3D is_paging(vcpu) ? vcpu->arch.cr3 : + VMX_EPT_IDENTITY_PAGETABLE_ADDR; + } + vmx_flush_tlb(vcpu); =2D vmcs_writel(GUEST_CR3, cr3); + vmcs_writel(GUEST_CR3, guest_cr3); if (vcpu->arch.cr0 & X86_CR0_PE) vmx_fpu_deactivate(vcpu); } =20 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { =2D vmcs_writel(CR4_READ_SHADOW, cr4); =2D vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ? =2D KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON)); + unsigned long hw_cr4 =3D cr4 | (vcpu->arch.rmode.active ? + KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + vcpu->arch.cr4 =3D cr4; + if (vm_need_ept()) + ept_update_paging_mode_cr4(&hw_cr4, vcpu); + + vmcs_writel(CR4_READ_SHADOW, cr4); + vmcs_writel(GUEST_CR4, hw_cr4); } =20 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) @@ -2116,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, st= ruct kvm_run *kvm_run) if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code =3D vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { + /* EPT won't cause page fault directly */ + if (vm_need_ept()) + BUG(); cr2 =3D vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); @@ -2445,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu,= struct kvm_run *kvm_run) return kvm_task_switch(vcpu, tss_selector, reason); } =20 +static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm= _run) +{ + u64 exit_qualification; + enum emulation_result er; + gpa_t gpa; + unsigned long hva; + int gla_validity; + int r; + + exit_qualification =3D vmcs_read64(EXIT_QUALIFICATION); + + if (exit_qualification & (1 << 6)) { + printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); + return -ENOTSUPP; + } + + gla_validity =3D (exit_qualification >> 7) & 0x3; + if (gla_validity !=3D 0x3 && gla_validity !=3D 0x1 && gla_validity !=3D 0= ) { + printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + kvm_run->exit_reason =3D KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason =3D 0; + return -ENOTSUPP; + } + + gpa =3D vmcs_read64(GUEST_PHYSICAL_ADDRESS); + hva =3D gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!kvm_is_error_hva(hva)) { + r =3D kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0); + if (r < 0) { + printk(KERN_ERR "EPT: Not enough memory!\n"); + return -ENOMEM; + } + return 1; + } else { + /* must be MMIO */ + er =3D emulate_instruction(vcpu, kvm_run, 0, 0, 0); + + if (er =3D=3D EMULATE_FAIL) { + printk(KERN_ERR + "EPT: Fail to handle EPT violation vmexit!er is %d\n", + er); + printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", + (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), + (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", + (long unsigned int)exit_qualification); + return -ENOTSUPP; + } else if (er =3D=3D EMULATE_DO_MMIO) + return 0; + } + return 1; +} + /* * The exit handlers return 1 if the exit was handled fully and guest exec= ution * may resume. Otherwise they set the kvm_run parameter to indicate what = needs @@ -2468,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu= *vcpu, [EXIT_REASON_APIC_ACCESS] =3D handle_apic_access, [EXIT_REASON_WBINVD] =3D handle_wbinvd, [EXIT_REASON_TASK_SWITCH] =3D handle_task_switch, + [EXIT_REASON_EPT_VIOLATION] =3D handle_ept_violation, }; =20 static const int kvm_vmx_max_exit_handlers =3D @@ -2486,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, = struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP), (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit); =20 + /* Access CR3 don't cause VMExit in paging mode, so we need + * to sync with guest real CR3. */ + if (vm_need_ept() && is_paging(vcpu)) { + vcpu->arch.cr3 =3D vmcs_readl(GUEST_CR3); + ept_load_pdptrs(vcpu); + } + if (unlikely(vmx->fail)) { kvm_run->exit_reason =3D KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2494,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, s= truct kvm_vcpu *vcpu) } =20 if ((vectoring_info & VECTORING_INFO_VALID_MASK) && =2D exit_reason !=3D EXIT_REASON_EXCEPTION_NMI) + (exit_reason !=3D EXIT_REASON_EXCEPTION_NMI && + exit_reason !=3D EXIT_REASON_EPT_VIOLATION)) printk(KERN_WARNING "%s: unexpected, valid vectoring info and " "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers @@ -2796,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *= kvm, unsigned int id) return ERR_PTR(-ENOMEM); =20 allocate_vpid(vmx); + if (id =3D=3D 0 && vm_need_ept()) { + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } =20 err =3D kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -2975,9 +3183,14 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); =20 + if (cpu_has_vmx_ept()) + bypass_guest_pf =3D 0; + if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); =20 + ept_sync_global(); + return 0; =20 out2: diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index f97eccc..79d94c6 100644 =2D-- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -353,6 +353,15 @@ enum vmcs_field { #define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25) #define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26) #define VMX_EPT_DEFAULT_GAW 3 +#define VMX_EPT_MAX_GAW 0x4 +#define VMX_EPT_MT_EPTE_SHIFT 3 +#define VMX_EPT_GAW_EPTP_SHIFT 3 +#define VMX_EPT_DEFAULT_MT 0x6ull +#define VMX_EPT_READABLE_MASK 0x1ull +#define VMX_EPT_WRITABLE_MASK 0x2ull +#define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) +#define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) =20 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul =20 diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index f03ef75..54a8f77 100644 =2D-- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -650,6 +650,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu,= u32 error_code) #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" +#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" =20 #define MSR_IA32_TIME_STAMP_COUNTER 0x010 =2D-=20 1.5.4.5 --Boundary-00=_BbVFI62qp6kE3lz Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone --Boundary-00=_BbVFI62qp6kE3lz Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel --Boundary-00=_BbVFI62qp6kE3lz--