From: Avi Kivity <avi@redhat.com>
To: oritw@il.ibm.com
Cc: kvm@vger.kernel.org, benami@il.ibm.com, muli@il.ibm.com,
abelg@il.ibm.com, aliguori@us.ibm.com, mmday@us.ibm.com
Subject: Re: [PATCH 3/6] Nested VMX patch 3 implements vmptrld and vmptrst
Date: Wed, 02 Sep 2009 23:05:09 +0300 [thread overview]
Message-ID: <4A9ECFF5.60701@redhat.com> (raw)
In-Reply-To: <1251905916-2834-4-git-send-email-oritw@il.ibm.com>
On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote:
> +struct __attribute__ ((__packed__)) level_state {
> + struct shadow_vmcs *shadow_vmcs;
> +
> + u16 vpid;
> + u64 shadow_efer;
> + unsigned long cr2;
> + unsigned long cr3;
> + unsigned long cr4;
> + unsigned long cr8;
> +
> + u64 io_bitmap_a;
> + u64 io_bitmap_b;
> + u64 msr_bitmap;
> +
> + struct vmcs *vmcs;
> + int cpu;
> + int launched;
> +};
>
> +
> struct vmcs {
> u32 revision_id;
> u32 abort;
> @@ -72,6 +217,17 @@ struct nested_vmx {
> bool vmon;
> /* Has the level1 guest done vmclear? */
> bool vmclear;
> + /* What is the location of the vmcs l1 keeps for l2? (in level1 gpa) */
> + u64 l1_cur_vmcs;
>
This is the vmptr (exactly as loaded by vmptrld), right? If so, please
call it vmptr.
> + /*
> + * Level 2 state : includes vmcs,registers and
> + * a copy of vmcs12 for vmread/vmwrite
> + */
> + struct level_state *l2_state;
> +
> + /* Level 1 state for switching to level 2 and back */
> + struct level_state *l1_state;
>
Can you explain why we need two of them? in the guest vmcs we have host
and guest values, and in l1_state and l2_state we have more copies, and
in struct vcpu we have yet another set of copies. We also have a couple
of copies in the host vmcs. I'm getting dizzy...
> static int init_rmode(struct kvm *kvm);
> static u64 construct_eptp(unsigned long root_hpa);
>
>
>
> +int read_guest_vmcs_gpa(struct kvm_vcpu *vcpu, u64 *gentry)
> +{
> + gpa_t gpa;
> + struct page *page;
> + int r = 0;
> +
> + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
> +
> + /* checking guest gpa */
> + page = gfn_to_page(vcpu->kvm, gpa>> PAGE_SHIFT);
> + if (is_error_page(page)) {
> + printk(KERN_ERR "%s Invalid guest vmcs addr %llx\n",
> + __func__, gpa);
> + r = 1;
> + goto out;
> + }
> +
> + r = kvm_read_guest(vcpu->kvm, gpa, gentry, sizeof(u64));
> + if (r) {
> + printk(KERN_ERR "%s cannot read guest vmcs addr %llx : %d\n",
> + __func__, gpa, r);
> + goto out;
> + }
>
You can use kvm_read_guest_virt() to simplify this.
> +
> + if (!IS_ALIGNED(*gentry, PAGE_SIZE)) {
> + printk(KERN_DEBUG "%s addr %llx not aligned\n",
> + __func__, *gentry);
> + return 1;
> + }
> +
> +out:
> + kvm_release_page_clean(page);
> + return r;
> +}
> +
> +static int handle_vmptrld(struct kvm_vcpu *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct page *vmcs_page;
> + u64 guest_vmcs_addr;
> +
> + if (!nested_vmx_check_permission(vcpu))
> + return 1;
> +
> + if (read_guest_vmcs_gpa(vcpu,&guest_vmcs_addr))
> + return 1;
> +
> + if (create_l1_state(vcpu)) {
> + printk(KERN_ERR "%s create_l1_state failed\n", __func__);
> + return 1;
> + }
> +
> + if (create_l2_state(vcpu)) {
> + printk(KERN_ERR "%s create_l2_state failed\n", __func__);
> + return 1;
> + }
> +
> + vmx->nested.l2_state->vmcs = alloc_vmcs();
> + if (!vmx->nested.l2_state->vmcs) {
> + printk(KERN_ERR "%s error in creating level 2 vmcs", __func__);
> + return 1;
> + }
> +
> + if (vmx->nested.l1_cur_vmcs != guest_vmcs_addr) {
> + vmcs_page = nested_get_page(vcpu, guest_vmcs_addr);
> + if (vmcs_page == NULL)
> + return 1;
> +
> + /* load nested vmcs to processor */
> + if (vmptrld(vcpu, page_to_phys(vmcs_page))) {
>
So, you're loading a guest page as the vmcs. This is dangerous as the
guest can play with it. Much better to use inaccessible memory (and you
do alloc_vmcs() earlier?)
> +
> +static int handle_vmptrst(struct kvm_vcpu *vcpu)
> +{
> + if (!nested_vmx_check_permission(vcpu))
> + return 1;
> +
> + vcpu->arch.regs[VCPU_REGS_RAX] = to_vmx(vcpu)->nested.l1_cur_vmcs;
>
Should store to mem64 according to the docs?
Better done through the emulator.
> +void save_vmcs(struct shadow_vmcs *dst)
> +{
> + dst->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
> + dst->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
> + dst->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
> + dst->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
> + dst->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
> + dst->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
> + dst->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
> + dst->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
> + dst->host_es_selector = vmcs_read16(HOST_ES_SELECTOR);
> + dst->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR);
> + dst->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR);
> + dst->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR);
> + dst->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR);
> + dst->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR);
> + dst->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR);
> + dst->io_bitmap_a = vmcs_read64(IO_BITMAP_A);
> + dst->io_bitmap_b = vmcs_read64(IO_BITMAP_B);
> + if (cpu_has_vmx_msr_bitmap())
> + dst->msr_bitmap = vmcs_read64(MSR_BITMAP);
> +
> + dst->vm_exit_msr_store_addr = vmcs_read64(VM_EXIT_MSR_STORE_ADDR);
> + dst->vm_exit_msr_load_addr = vmcs_read64(VM_EXIT_MSR_LOAD_ADDR);
> + dst->vm_entry_msr_load_addr = vmcs_read64(VM_ENTRY_MSR_LOAD_ADDR);
> + dst->tsc_offset = vmcs_read64(TSC_OFFSET);
> + dst->virtual_apic_page_addr = vmcs_read64(VIRTUAL_APIC_PAGE_ADDR);
> + dst->apic_access_addr = vmcs_read64(APIC_ACCESS_ADDR);
> + if (enable_ept)
> + dst->ept_pointer = vmcs_read64(EPT_POINTER);
> +
> + dst->guest_physical_address = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> + dst->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER);
> + dst->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> + if (vmcs_config.vmentry_ctrl& VM_ENTRY_LOAD_IA32_PAT)
> + dst->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
> + if (enable_ept) {
> + dst->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> + dst->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> + dst->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> + dst->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> + }
> + dst->pin_based_vm_exec_control = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
> + dst->cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> + dst->exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
> + dst->page_fault_error_code_mask =
> + vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK);
> + dst->page_fault_error_code_match =
> + vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH);
> + dst->cr3_target_count = vmcs_read32(CR3_TARGET_COUNT);
> + dst->vm_exit_controls = vmcs_read32(VM_EXIT_CONTROLS);
> + dst->vm_exit_msr_store_count = vmcs_read32(VM_EXIT_MSR_STORE_COUNT);
> + dst->vm_exit_msr_load_count = vmcs_read32(VM_EXIT_MSR_LOAD_COUNT);
> + dst->vm_entry_controls = vmcs_read32(VM_ENTRY_CONTROLS);
> + dst->vm_entry_msr_load_count = vmcs_read32(VM_ENTRY_MSR_LOAD_COUNT);
> + dst->vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
> + dst->vm_entry_exception_error_code =
> + vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
> + dst->vm_entry_instruction_len = vmcs_read32(VM_ENTRY_INSTRUCTION_LEN);
> + dst->tpr_threshold = vmcs_read32(TPR_THRESHOLD);
> + dst->secondary_vm_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> + if (enable_vpid&& dst->secondary_vm_exec_control&
> + SECONDARY_EXEC_ENABLE_VPID)
> + dst->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
> + dst->vm_instruction_error = vmcs_read32(VM_INSTRUCTION_ERROR);
> + dst->vm_exit_reason = vmcs_read32(VM_EXIT_REASON);
> + dst->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> + dst->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
> + dst->idt_vectoring_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
> + dst->idt_vectoring_error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE);
> + dst->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> + dst->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> + dst->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
> + dst->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
> + dst->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
> + dst->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
> + dst->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
> + dst->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
> + dst->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
> + dst->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
> + dst->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
> + dst->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
> + dst->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
> + dst->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
> + dst->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
> + dst->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
> + dst->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
> + dst->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
> + dst->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
> + dst->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
> + dst->guest_interruptibility_info =
> + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
> + dst->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
> + dst->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
> + dst->host_ia32_sysenter_cs = vmcs_read32(HOST_IA32_SYSENTER_CS);
> + dst->cr0_guest_host_mask = vmcs_readl(CR0_GUEST_HOST_MASK);
> + dst->cr4_guest_host_mask = vmcs_readl(CR4_GUEST_HOST_MASK);
> + dst->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW);
> + dst->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW);
> + dst->cr3_target_value0 = vmcs_readl(CR3_TARGET_VALUE0);
> + dst->cr3_target_value1 = vmcs_readl(CR3_TARGET_VALUE1);
> + dst->cr3_target_value2 = vmcs_readl(CR3_TARGET_VALUE2);
> + dst->cr3_target_value3 = vmcs_readl(CR3_TARGET_VALUE3);
> + dst->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> + dst->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
> + dst->guest_cr0 = vmcs_readl(GUEST_CR0);
> + dst->guest_cr3 = vmcs_readl(GUEST_CR3);
> + dst->guest_cr4 = vmcs_readl(GUEST_CR4);
> + dst->guest_es_base = vmcs_readl(GUEST_ES_BASE);
> + dst->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
> + dst->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
> + dst->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
> + dst->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
> + dst->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
> + dst->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
> + dst->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
> + dst->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
> + dst->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
> + dst->guest_dr7 = vmcs_readl(GUEST_DR7);
> + dst->guest_rsp = vmcs_readl(GUEST_RSP);
> + dst->guest_rip = vmcs_readl(GUEST_RIP);
> + dst->guest_rflags = vmcs_readl(GUEST_RFLAGS);
> + dst->guest_pending_dbg_exceptions =
> + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
> + dst->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
> + dst->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
> + dst->host_cr0 = vmcs_readl(HOST_CR0);
> + dst->host_cr3 = vmcs_readl(HOST_CR3);
> + dst->host_cr4 = vmcs_readl(HOST_CR4);
> + dst->host_fs_base = vmcs_readl(HOST_FS_BASE);
> + dst->host_gs_base = vmcs_readl(HOST_GS_BASE);
> + dst->host_tr_base = vmcs_readl(HOST_TR_BASE);
> + dst->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE);
> + dst->host_idtr_base = vmcs_readl(HOST_IDTR_BASE);
> + dst->host_ia32_sysenter_esp = vmcs_readl(HOST_IA32_SYSENTER_ESP);
> + dst->host_ia32_sysenter_eip = vmcs_readl(HOST_IA32_SYSENTER_EIP);
> + dst->host_rsp = vmcs_readl(HOST_RSP);
> + dst->host_rip = vmcs_readl(HOST_RIP);
> + if (vmcs_config.vmexit_ctrl& VM_EXIT_LOAD_IA32_PAT)
> + dst->host_ia32_pat = vmcs_read64(HOST_IA32_PAT);
> +}
>
I see. You're using the processor's format when reading the guest
vmcs. But we don't have to do that, we can use the shadow_vmcs
structure (and a memcpy).
--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
next prev parent reply other threads:[~2009-09-02 20:04 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-02 15:38 Nested VMX support - kernel v1 oritw
2009-09-02 15:38 ` [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff oritw
2009-09-02 15:38 ` [PATCH 2/6] Nested VMX patch 2 implements vmclear oritw
2009-09-02 15:38 ` [PATCH 3/6] Nested VMX patch 3 implements vmptrld and vmptrst oritw
2009-09-02 15:38 ` [PATCH 2/2] Nested VMX patch 4 implements vmread and vmwrite oritw
2009-09-02 15:38 ` [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume oritw
2009-09-02 21:38 ` Avi Kivity
2009-09-03 14:53 ` Orit Wasserman
2009-09-06 9:29 ` Avi Kivity
2009-09-06 13:38 ` Orit Wasserman
2009-09-02 20:15 ` [PATCH 2/2] Nested VMX patch 4 implements vmread and vmwrite Avi Kivity
2009-09-03 14:26 ` Orit Wasserman
2009-09-02 20:05 ` Avi Kivity [this message]
2009-09-03 14:25 ` [PATCH 3/6] Nested VMX patch 3 implements vmptrld and vmptrst Orit Wasserman
2009-09-06 9:25 ` Avi Kivity
2009-09-06 13:36 ` Orit Wasserman
2009-09-06 13:52 ` Avi Kivity
2009-09-06 16:55 ` Orit Wasserman
2009-09-06 19:10 ` Avi Kivity
2009-09-02 19:38 ` [PATCH 2/6] Nested VMX patch 2 implements vmclear Avi Kivity
2009-09-03 13:54 ` Orit Wasserman
2009-09-02 19:34 ` [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff Avi Kivity
2009-09-03 12:34 ` Orit Wasserman
2009-09-03 13:39 ` Avi Kivity
2009-09-03 14:54 ` Orit Wasserman
2009-09-02 15:57 ` Nested VMX support - kernel v1 Alexander Graf
2009-09-03 6:01 ` Muli Ben-Yehuda
2009-09-03 7:29 ` Alexander Graf
2009-09-03 9:53 ` Muli Ben-Yehuda
2009-09-06 19:28 ` Anthony Liguori
2009-09-02 21:39 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4A9ECFF5.60701@redhat.com \
--to=avi@redhat.com \
--cc=abelg@il.ibm.com \
--cc=aliguori@us.ibm.com \
--cc=benami@il.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=mmday@us.ibm.com \
--cc=muli@il.ibm.com \
--cc=oritw@il.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.