From mboxrd@z Thu Jan 1 00:00:00 1970 From: Avi Kivity Subject: Re: [PATCH 4/7] Nested VMX patch 4 implements vmread and vmwrite Date: Wed, 16 Dec 2009 16:44:19 +0200 Message-ID: <4B28F243.9010300@redhat.com> References: <1260470309-7166-1-git-send-email-oritw@il.ibm.com> <1260470309-7166-2-git-send-email-oritw@il.ibm.com> <1260470309-7166-3-git-send-email-oritw@il.ibm.com> <1260470309-7166-4-git-send-email-oritw@il.ibm.com> <1260470309-7166-5-git-send-email-oritw@il.ibm.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Cc: kvm@vger.kernel.org, benami@il.ibm.com, abelg@il.ibm.com, muli@il.ibm.com, aliguori@us.ibm.com, mdday@us.ibm.com To: oritw@il.ibm.com Return-path: Received: from mx1.redhat.com ([209.132.183.28]:27157 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750844AbZLPOoe (ORCPT ); Wed, 16 Dec 2009 09:44:34 -0500 In-Reply-To: <1260470309-7166-5-git-send-email-oritw@il.ibm.com> Sender: kvm-owner@vger.kernel.org List-ID: On 12/10/2009 08:38 PM, oritw@il.ibm.com wrote: > From: Orit Wasserman > > --- > arch/x86/kvm/vmx.c | 670 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 files changed, 660 insertions(+), 10 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 46a4f3a..8745d44 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -239,6 +239,7 @@ struct __attribute__ ((__packed__)) level_state { > struct __attribute__ ((__packed__)) nested_vmcs_page { > u32 revision_id; > u32 abort; > + struct shadow_vmcs shadow_vmcs; > struct level_state l2_state; > }; > > @@ -263,6 +264,55 @@ struct nested_vmx { > struct nested_vmcs_page *current_l2_page; > }; > > +enum vmcs_field_type { > + VMCS_FIELD_TYPE_U16 = 0, > + VMCS_FIELD_TYPE_U64 = 1, > + VMCS_FIELD_TYPE_U32 = 2, > + VMCS_FIELD_TYPE_ULONG = 3 > +}; > + > +#define VMCS_FIELD_LENGTH_OFFSET 13 > +#define VMCS_FIELD_LENGTH_MASK 0x6000 > + > +/* > + Returns VMCS Field type > +*/ > +static inline int vmcs_field_type(unsigned long field) > +{ > + /* For 32 bit L1 when it using the HIGH field */ > + if (0x1& field) > + return VMCS_FIELD_TYPE_U32; > + > + return (VMCS_FIELD_LENGTH_MASK& field)>> 13; > +} > + > +/* > + Returncs VMCS field size in bits > +*/ > +static inline int vmcs_field_size(int field_type, struct kvm_vcpu *vcpu) > +{ > + switch (field_type) { > + case VMCS_FIELD_TYPE_U16: > + return 2; > + case VMCS_FIELD_TYPE_U32: > + return 4; > + case VMCS_FIELD_TYPE_U64: > + return 8; > + case VMCS_FIELD_TYPE_ULONG: > +#ifdef CONFIG_X86_64 > + if (is_long_mode(vcpu)) > + return 8; > + else > Can replace with #endif > + return 4; > +#else > + return 4; > +#endif > ... and drop the previous three lines. > + } > + > + printk(KERN_INFO "WARNING: invalid field type %d \n", field_type); > + return 0; > Can this happen? The field is only two bits wide. > > +static inline struct shadow_vmcs *get_shadow_vmcs(struct kvm_vcpu *vcpu) > +{ > + WARN_ON(!to_vmx(vcpu)->nested.current_l2_page); > + return&(to_vmx(vcpu)->nested.current_l2_page->shadow_vmcs); > +} > + > +#define SHADOW_VMCS_OFFSET(x) offsetof(struct shadow_vmcs, x) > + > +static unsigned short vmcs_field_to_offset_table[HOST_RIP+1] = { > + > + [VIRTUAL_PROCESSOR_ID] = > + SHADOW_VMCS_OFFSET(virtual_processor_id), > Keep on one line, you can use a shorter macro name if it helps. This table is just noise. > + > +static inline unsigned short vmcs_field_to_offset(unsigned long field) > +{ > + > + if (field> HOST_RIP || vmcs_field_to_offset_table[field] == 0) { > + printk(KERN_ERR "invalid vmcs encoding 0x%lx\n", field); > + return -1; > This will be converted to 0xffff. > + } > + > + return vmcs_field_to_offset_table[field]; > +} > + > +static inline unsigned long nested_vmcs_readl(struct kvm_vcpu *vcpu, > + unsigned long field) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + unsigned long *entry; > + > + if (!vmx->nested.current_l2_page) { > + printk(KERN_ERR "%s invalid nested vmcs\n", __func__); > + return -1; > + } > + > + entry = (unsigned long *)((char *)(get_shadow_vmcs(vcpu)) + > + vmcs_field_to_offset(field)); > Error check? > +static inline u64 nested_vmcs_read64(struct kvm_vcpu *vcpu, unsigned long field) > +{ > + struct vcpu_vmx *vmx = to_vmx(vcpu); > + u64 *entry; > + if (!vmx->nested.current_l2_page) { > + printk(KERN_ERR "%s invalid nested vmcs\n", __func__); > + return -1; > + } > + > + entry = (u64 *)((char *)(get_shadow_vmcs(vcpu)) + > + vmcs_field_to_offset(field)); > Need to support the 'high' part of 64-bit fields. > + return *entry; > +} > > + > +static inline void nested_vmcs_write64(struct kvm_vcpu *vcpu, > + unsigned long field, u64 value) > +{ > +#ifdef CONFIG_X86_64 > + nested_vmcs_writel(vcpu, field, value); > +#else /* nested: 32 bit not actually tested */ > + nested_vmcs_writel(vcpu, field, value); > + nested_vmcs_writel(vcpu, field+1, value>> 32); > +#endif > High field support needed. > static struct page *nested_get_page(struct kvm_vcpu *vcpu, > u64 vmcs_addr) > { > @@ -354,11 +809,6 @@ static int nested_map_current(struct kvm_vcpu *vcpu) > > mapped_page = kmap_atomic(vmcs_page, KM_USER0); > > - if (!mapped_page) { > - printk(KERN_INFO "%s: error in kmap_atomic\n", __func__); > - return 0; > - } > - > Fold. > vmx->nested.current_l2_page = mapped_page; > > return 1; > @@ -1390,7 +1840,7 @@ static int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, gva_t gva, u64 *gentry) > size, vcpu); > if (r) { > printk(KERN_ERR "%s cannot read guest vmcs addr %lx : %d\n", > - __func__, vcpu->arch.regs[VCPU_REGS_RAX], r); > + __func__, gva, r); > Fold. > @@ -3764,6 +4214,26 @@ static gva_t get_vmx_mem_address(struct kvm_vcpu *vcpu, > return addr; > } > > +static void set_rflags_to_vmx_fail_invalid(struct kvm_vcpu *vcpu) > +{ > + unsigned long rflags; > + rflags = vmx_get_rflags(vcpu); > + rflags |= X86_EFLAGS_CF; > + rflags&= ~X86_EFLAGS_PF& ~X86_EFLAGS_AF& ~X86_EFLAGS_ZF& > + ~X86_EFLAGS_SF& ~X86_EFLAGS_OF; > + vmx_set_rflags(vcpu, rflags); > +} > + > +static void set_rflags_to_vmx_fail_valid(struct kvm_vcpu *vcpu) > +{ > + unsigned long rflags; > + rflags = vmx_get_rflags(vcpu); > + rflags |= X86_EFLAGS_ZF; > + rflags&= ~X86_EFLAGS_PF& ~X86_EFLAGS_AF& ~X86_EFLAGS_CF& > + ~X86_EFLAGS_SF& ~X86_EFLAGS_OF; > + vmx_set_rflags(vcpu, rflags); > + } > + > These are needed much earlier. > > +static int handle_vmread_reg(struct kvm_vcpu *vcpu, int reg, > + unsigned long field) > +{ > + u64 field_value; > + > + switch (vmcs_field_type(field)) { > + case VMCS_FIELD_TYPE_U16: > + field_value = nested_vmcs_read16(vcpu, field); > + break; > + case VMCS_FIELD_TYPE_U32: > + field_value = nested_vmcs_read32(vcpu, field); > + break; > + case VMCS_FIELD_TYPE_U64: > + field_value = nested_vmcs_read64(vcpu, field); > +#ifdef CONFIG_X86_64 > + if (!is_long_mode(vcpu)) { > + kvm_register_write(vcpu, reg+1, field_value>> 32); > + field_value = (u32)field_value; > + } > +#endif > + break; > + case VMCS_FIELD_TYPE_ULONG: > + field_value = nested_vmcs_readl(vcpu, field); > +#ifdef CONFIG_X86_64 > + if (!is_long_mode(vcpu)) { > + kvm_register_write(vcpu, reg+1, field_value>> 32); > + field_value = (u32)field_value; > + } > +#endif > + break; > + default: > + printk(KERN_INFO "%s invalid field\n", __func__); > + return 0; > + } > + > + kvm_register_write(vcpu, reg, field_value); > + return 1; > +} > + > +static int handle_vmread_mem(struct kvm_vcpu *vcpu, gva_t gva, > + unsigned long field) > +{ > + u64 field_value; > + > + switch (vmcs_field_type(field)) { > + case VMCS_FIELD_TYPE_U16: > + field_value = nested_vmcs_read16(vcpu, field); > + break; > + case VMCS_FIELD_TYPE_U32: > + field_value = nested_vmcs_read32(vcpu, field); > + break; > + case VMCS_FIELD_TYPE_U64: > + field_value = nested_vmcs_read64(vcpu, field); > + break; > + case VMCS_FIELD_TYPE_ULONG: > + field_value = nested_vmcs_readl(vcpu, field); > + break; > + default: > + printk(KERN_INFO "%s invalid field\n", __func__); > + return 0; > + } > + > + kvm_write_guest_virt(gva,&field_value, > + vmcs_field_size(vmcs_field_type(field), vcpu), > + vcpu); > + return 1; > +} > Looks like a lot of code duplication. You can probably do this with a single function, and write either to a register or memory at the end. > + > +static int handle_vmread(struct kvm_vcpu *vcpu) > +{ > + unsigned long field; > + int reg; > + int is_reg; > + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > + u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > + gva_t gva = 0; > + int read_succeed; > + > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + > + if (!nested_map_current(vcpu)) { > + printk(KERN_INFO "%s invalid shadow vmcs\n", __func__); > + set_rflags_to_vmx_fail_invalid(vcpu); > + return 1; > + } > + > + /* decode instruction info to get the field to read and where to store its value */ > + /* Bit 10, Mem/Reg (0 = memory, 1 = register) */ > + is_reg = vmx_instruction_info& (1u<< 10); /* bit 10 */ > + field = kvm_register_read(vcpu, (vmx_instruction_info>> 28)& 0xf); /* bits 31:28 */ > + > + if (is_reg) { > + reg = (vmx_instruction_info>> 3)& 0xf; /* bits 3:6 */ > + read_succeed = handle_vmread_reg(vcpu, reg, field); > + } else { > + gva = get_vmx_mem_address(vcpu, exit_qualification, > + vmx_instruction_info); > + read_succeed = handle_vmread_mem(vcpu, gva, field); > + } > + > This, too, can go into a separate function instead of being duplicated all over. > + if (read_succeed) { > + clear_rflags_cf_zf(vcpu); > + skip_emulated_instruction(vcpu); > + } else { > + set_rflags_to_vmx_fail_valid(vcpu); > + vmcs_write32(VM_INSTRUCTION_ERROR, 12); > + } > + > + nested_unmap_current(vcpu); > + return 1; > +} > + > > > static int handle_vmoff(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -3901,15 +4546,20 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) > unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > gva_t vmcs_gva; > - > + uint size; > if (!nested_vmx_check_permission(vcpu)) > return 1; > vmcs_gva = get_vmx_mem_address(vcpu, exit_qualification, > vmx_instruction_info); > > + if (is_long_mode(vcpu)) > + size = sizeof(u64); > + else > + size = sizeof(u32); > I think the vmpointers are always 64-bit. -- error compiling committee.c: too many arguments to function