All of lore.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: oritw@il.ibm.com
Cc: kvm@vger.kernel.org, benami@il.ibm.com, muli@il.ibm.com,
	abelg@il.ibm.com, aliguori@us.ibm.com, mmday@us.ibm.com
Subject: Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume
Date: Thu, 03 Sep 2009 00:38:16 +0300	[thread overview]
Message-ID: <4A9EE5C8.8040107@redhat.com> (raw)
In-Reply-To: <1251905916-2834-6-git-send-email-oritw@il.ibm.com>

On 09/02/2009 06:38 PM, oritw@il.ibm.com wrote:
> -struct nested_vmx {
> -	/* Has the level1 guest done vmon? */
> +struct nested_vmx {	/* Has the level1 guest done vmon? */
>    

A \n died here.

>   	bool vmon;
>   	/* Has the level1 guest done vmclear? */
>   	bool vmclear;
> +
> +	/* Are we running nested guest */
> +	bool nested_mode;
> +
> +	/* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */
> +	bool nested_run_pending;
> +
> +	/* flag indicating if there was a valid IDT after exiting from l2 */
> +	bool nested_pending_valid_idt;
>    

What does this mean?  pending event?

>
> +
> +static inline int nested_cpu_has_vmx_tpr_shadow(struct  kvm_vcpu *vcpu)
> +{
> +	return to_vmx(vcpu)->nested.l2_state->shadow_vmcs->
> +		cpu_based_vm_exec_control&  CPU_BASED_TPR_SHADOW;
> +}
>    

Don't we need to check if the host supports it too?

> +static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu
> +							   *vcpu)
> +{
> +	struct shadow_vmcs *shadow = to_vmx(vcpu)->nested.l2_state->shadow_vmcs;
> +
> +	return (shadow->secondary_vm_exec_control&
> +		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)&&
> +		to_vmx(vcpu)->nested.l2_state->shadow_vmcs->apic_access_addr != 0;
> +}
>    

Why check apic_access_addr?

> +
> +static inline int nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu)
> +{
> +	return to_vmx(vcpu)->nested.l2_state->shadow_vmcs->
> +		secondary_vm_exec_control&  SECONDARY_EXEC_ENABLE_EPT;
> +}
>    

Need to check if secondary controls enabled?

> +static void vmx_set_irq(struct kvm_vcpu *vcpu)
> +{
> +	if (to_vmx(vcpu)->nested.nested_mode)
> +		return;
>    

Why?

Note if the guest didn't enable external interrupt exiting, we need to 
inject as usual.

>
> +static int nested_handle_pending_idt(struct kvm_vcpu *vcpu)
> +{
>    

Again the name is confusing.  pending_event_injection?

> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int irq;
> +	int type;
> +	int errCodeValid;
> +	u32 idt_vectoring_info;
> +	u32 guest_intr;
> +	bool nmi_window_open;
> +	bool interrupt_window_open;
> +
> +	if (vmx->nested.nested_mode&&  vmx->nested.nested_pending_valid_idt) {
> +		idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
> +		irq  = idt_vectoring_info&  VECTORING_INFO_VECTOR_MASK;
> +		type = idt_vectoring_info&  VECTORING_INFO_TYPE_MASK;
> +		errCodeValid = idt_vectoring_info&
> +			VECTORING_INFO_DELIVER_CODE_MASK;
> +
> +		guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
> +		nmi_window_open =
> +			!(guest_intr&  (GUEST_INTR_STATE_STI |
> +					GUEST_INTR_STATE_MOV_SS |
> +					GUEST_INTR_STATE_NMI));
> +
> +		interrupt_window_open =
> +			((vmcs_readl(GUEST_RFLAGS)&  X86_EFLAGS_IF)&&
> +			 !(guest_intr&  (GUEST_INTR_STATE_STI |
> +					 GUEST_INTR_STATE_MOV_SS)));
> +
> +		if (type == INTR_TYPE_EXT_INTR&&  !interrupt_window_open) {
> +			printk(KERN_INFO "IDT ignored, l2 interrupt window closed!\n");
> +			return 0;
> +		}
>    

How can this happen?  Unless it's on nested entry, in which case we need 
to abort the entry.

> +
>   #ifdef CONFIG_X86_64
>   #define R "r"
>   #define Q "q"
> @@ -4646,6 +4842,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>   {
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> +	nested_handle_pending_idt(vcpu);
>    

You're not checking the return code (need to do that on entry).

> +
> +	if (vmx->nested.nested_mode) {
> +		vmcs_writel(GUEST_CR0, vmx->nested.l2_state->shadow_vmcs->guest_cr0);
>    

Might not be legal.  We may also want to force-enable caching.  Lastly, 
don't we need to handle cr0.ts and ct0.mp specially to manage the fpu state?

>
> +	if (vmx->nested.nested_mode)
> +		vmx->nested.vmclear = 0;
> +
>    

Why?

>   free_vmcs:
> @@ -5122,6 +5339,228 @@ static int shadow_vmcs_load(struct kvm_vcpu *vcpu)
>   	return 0;
>   }
>
> +void prepare_vmcs_12(struct kvm_vcpu *vcpu)
> +{
> +	struct shadow_vmcs *l2_shadow_vmcs =
> +		to_vmx(vcpu)->nested.l2_state->shadow_vmcs;
> +	struct shadow_vmcs *l1_shadow_vmcs =
> +		to_vmx(vcpu)->nested.l1_state->shadow_vmcs;
> +
> +	l2_shadow_vmcs->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
> +	l2_shadow_vmcs->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
> +	l2_shadow_vmcs->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
> +	l2_shadow_vmcs->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
> +	l2_shadow_vmcs->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
> +	l2_shadow_vmcs->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
> +	l2_shadow_vmcs->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
> +	l2_shadow_vmcs->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
> +
> +	l1_shadow_vmcs->host_es_selector = vmcs_read16(HOST_ES_SELECTOR);
> +	l1_shadow_vmcs->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR);
> +	l1_shadow_vmcs->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR);
> +	l1_shadow_vmcs->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR);
> +	l1_shadow_vmcs->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR);
> +	l1_shadow_vmcs->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR);
> +	l1_shadow_vmcs->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR);
> +
> +	l2_shadow_vmcs->tsc_offset = vmcs_read64(TSC_OFFSET);
> +	l2_shadow_vmcs->guest_physical_address =
> +		vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> +	l2_shadow_vmcs->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER);
> +	l2_shadow_vmcs->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +	if (vmcs_config.vmentry_ctrl&  VM_ENTRY_LOAD_IA32_PAT)
> +		l2_shadow_vmcs->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
> +	l2_shadow_vmcs->cr3_target_count = vmcs_read32(CR3_TARGET_COUNT);
> +	l2_shadow_vmcs->vm_entry_intr_info_field =
> +		vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
> +	l2_shadow_vmcs->vm_entry_exception_error_code =
> +		vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
> +	l2_shadow_vmcs->vm_entry_instruction_len =
> +		vmcs_read32(VM_ENTRY_INSTRUCTION_LEN);
> +	l2_shadow_vmcs->vm_instruction_error =
> +		vmcs_read32(VM_INSTRUCTION_ERROR);
> +	l2_shadow_vmcs->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
> +	l2_shadow_vmcs->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> +	l2_shadow_vmcs->vm_exit_intr_error_code =
> +		vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
> +	l2_shadow_vmcs->idt_vectoring_info_field =
> +		vmcs_read32(IDT_VECTORING_INFO_FIELD);
> +	l2_shadow_vmcs->idt_vectoring_error_code =
> +		vmcs_read32(IDT_VECTORING_ERROR_CODE);
> +	l2_shadow_vmcs->vm_exit_instruction_len =
> +		vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> +	l2_shadow_vmcs->vmx_instruction_info =
> +		vmcs_read32(VMX_INSTRUCTION_INFO);
> +	l2_shadow_vmcs->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
> +	l2_shadow_vmcs->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
> +	l2_shadow_vmcs->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
> +	l2_shadow_vmcs->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
> +	l2_shadow_vmcs->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
> +	l2_shadow_vmcs->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
> +	l2_shadow_vmcs->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
> +	l2_shadow_vmcs->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
> +	l2_shadow_vmcs->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
> +	l2_shadow_vmcs->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
> +	l2_shadow_vmcs->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
> +	l2_shadow_vmcs->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
> +	l2_shadow_vmcs->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
> +	l2_shadow_vmcs->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
> +	l2_shadow_vmcs->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
> +	l2_shadow_vmcs->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
> +	l2_shadow_vmcs->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
> +	l2_shadow_vmcs->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
> +	l2_shadow_vmcs->guest_interruptibility_info =
> +		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
> +	l2_shadow_vmcs->guest_activity_state =
> +		vmcs_read32(GUEST_ACTIVITY_STATE);
> +	l2_shadow_vmcs->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
> +
> +	l1_shadow_vmcs->host_ia32_sysenter_cs =
> +		vmcs_read32(HOST_IA32_SYSENTER_CS);
> +
> +	l2_shadow_vmcs->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW);
> +	l2_shadow_vmcs->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW);
> +	l2_shadow_vmcs->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +	l2_shadow_vmcs->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
> +	l2_shadow_vmcs->guest_cr0 = vmcs_readl(GUEST_CR0);
> +
> +	l2_shadow_vmcs->guest_cr4 = vmcs_readl(GUEST_CR4);
> +	l2_shadow_vmcs->guest_es_base = vmcs_readl(GUEST_ES_BASE);
> +	l2_shadow_vmcs->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
> +	l2_shadow_vmcs->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
> +	l2_shadow_vmcs->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
> +	l2_shadow_vmcs->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
> +	l2_shadow_vmcs->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
> +	l2_shadow_vmcs->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
> +	l2_shadow_vmcs->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
> +	l2_shadow_vmcs->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
> +	l2_shadow_vmcs->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
> +	l2_shadow_vmcs->guest_dr7 = vmcs_readl(GUEST_DR7);
> +	l2_shadow_vmcs->guest_rsp = vmcs_readl(GUEST_RSP);
> +	l2_shadow_vmcs->guest_rip = vmcs_readl(GUEST_RIP);
> +	l2_shadow_vmcs->guest_rflags = vmcs_readl(GUEST_RFLAGS);
> +	l2_shadow_vmcs->guest_pending_dbg_exceptions =
> +		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
> +	l2_shadow_vmcs->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
> +	l2_shadow_vmcs->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
> +
> +	l1_shadow_vmcs->host_cr0 = vmcs_readl(HOST_CR0);
> +	l1_shadow_vmcs->host_cr3 = vmcs_readl(HOST_CR3);
> +	l1_shadow_vmcs->host_cr4 = vmcs_readl(HOST_CR4);
> +	l1_shadow_vmcs->host_fs_base = vmcs_readl(HOST_FS_BASE);
> +	l1_shadow_vmcs->host_gs_base = vmcs_readl(HOST_GS_BASE);
> +	l1_shadow_vmcs->host_tr_base = vmcs_readl(HOST_TR_BASE);
> +	l1_shadow_vmcs->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE);
> +	l1_shadow_vmcs->host_idtr_base = vmcs_readl(HOST_IDTR_BASE);
> +	l1_shadow_vmcs->host_ia32_sysenter_esp =
> +		vmcs_readl(HOST_IA32_SYSENTER_ESP);
> +	l1_shadow_vmcs->host_ia32_sysenter_eip =
> +		vmcs_readl(HOST_IA32_SYSENTER_EIP);
> +	l1_shadow_vmcs->host_rsp = vmcs_readl(HOST_RSP);
> +	l1_shadow_vmcs->host_rip = vmcs_readl(HOST_RIP);
> +}
>    

Can't we do it lazily?  Only read these on demand?

> +
> +int load_vmcs_common(struct shadow_vmcs *src)
> +{
> +	vmcs_write16(GUEST_ES_SELECTOR, src->guest_es_selector);
> +	vmcs_write16(GUEST_CS_SELECTOR, src->guest_cs_selector);
> +	vmcs_write16(GUEST_SS_SELECTOR, src->guest_ss_selector);
> +	vmcs_write16(GUEST_DS_SELECTOR, src->guest_ds_selector);
> +	vmcs_write16(GUEST_FS_SELECTOR, src->guest_fs_selector);
> +	vmcs_write16(GUEST_GS_SELECTOR, src->guest_gs_selector);
> +	vmcs_write16(GUEST_LDTR_SELECTOR, src->guest_ldtr_selector);
> +	vmcs_write16(GUEST_TR_SELECTOR, src->guest_tr_selector);
> +
> +	vmcs_write64(VMCS_LINK_POINTER, src->vmcs_link_pointer);
> +	vmcs_write64(GUEST_IA32_DEBUGCTL, src->guest_ia32_debugctl);
> +
> +	if (vmcs_config.vmentry_ctrl&  VM_ENTRY_LOAD_IA32_PAT)
> +		vmcs_write64(GUEST_IA32_PAT, src->guest_ia32_pat);
> +
> +	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, src->vm_entry_msr_load_count);
> +	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, src->vm_entry_intr_info_field);
> +	vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> +		     src->vm_entry_exception_error_code);
> +	vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, src->vm_entry_instruction_len);
> +
> +	vmcs_write32(GUEST_ES_LIMIT, src->guest_es_limit);
> +	vmcs_write32(GUEST_CS_LIMIT, src->guest_cs_limit);
> +	vmcs_write32(GUEST_SS_LIMIT, src->guest_ss_limit);
> +	vmcs_write32(GUEST_DS_LIMIT, src->guest_ds_limit);
> +	vmcs_write32(GUEST_FS_LIMIT, src->guest_fs_limit);
> +	vmcs_write32(GUEST_GS_LIMIT, src->guest_gs_limit);
> +	vmcs_write32(GUEST_LDTR_LIMIT, src->guest_ldtr_limit);
> +	vmcs_write32(GUEST_TR_LIMIT, src->guest_tr_limit);
> +	vmcs_write32(GUEST_GDTR_LIMIT, src->guest_gdtr_limit);
> +	vmcs_write32(GUEST_IDTR_LIMIT, src->guest_idtr_limit);
> +	vmcs_write32(GUEST_ES_AR_BYTES, src->guest_es_ar_bytes);
> +	vmcs_write32(GUEST_CS_AR_BYTES, src->guest_cs_ar_bytes);
> +	vmcs_write32(GUEST_SS_AR_BYTES, src->guest_ss_ar_bytes);
> +	vmcs_write32(GUEST_DS_AR_BYTES, src->guest_ds_ar_bytes);
> +	vmcs_write32(GUEST_FS_AR_BYTES, src->guest_fs_ar_bytes);
> +	vmcs_write32(GUEST_GS_AR_BYTES, src->guest_gs_ar_bytes);
> +	vmcs_write32(GUEST_LDTR_AR_BYTES, src->guest_ldtr_ar_bytes);
> +	vmcs_write32(GUEST_TR_AR_BYTES, src->guest_tr_ar_bytes);
> +	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
> +		     src->guest_interruptibility_info);
> +	vmcs_write32(GUEST_ACTIVITY_STATE, src->guest_activity_state);
> +	vmcs_write32(GUEST_SYSENTER_CS, src->guest_sysenter_cs);
> +
> +	vmcs_writel(GUEST_ES_BASE, src->guest_es_base);
> +	vmcs_writel(GUEST_CS_BASE, src->guest_cs_base);
> +	vmcs_writel(GUEST_SS_BASE, src->guest_ss_base);
> +	vmcs_writel(GUEST_DS_BASE, src->guest_ds_base);
> +	vmcs_writel(GUEST_FS_BASE, src->guest_fs_base);
> +	vmcs_writel(GUEST_GS_BASE, src->guest_gs_base);
> +	vmcs_writel(GUEST_LDTR_BASE, src->guest_ldtr_base);
> +	vmcs_writel(GUEST_TR_BASE, src->guest_tr_base);
> +	vmcs_writel(GUEST_GDTR_BASE, src->guest_gdtr_base);
> +	vmcs_writel(GUEST_IDTR_BASE, src->guest_idtr_base);
> +	vmcs_writel(GUEST_DR7, src->guest_dr7);
> +	vmcs_writel(GUEST_RSP, src->guest_rsp);
> +	vmcs_writel(GUEST_RIP, src->guest_rip);
> +	vmcs_writel(GUEST_RFLAGS, src->guest_rflags);
> +	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> +		    src->guest_pending_dbg_exceptions);
> +	vmcs_writel(GUEST_SYSENTER_ESP, src->guest_sysenter_esp);
> +	vmcs_writel(GUEST_SYSENTER_EIP, src->guest_sysenter_eip);
> +
> +	return 0;
> +}
>    

If we do it lazily, we'll only need to reload bits that have changed.

>   struct level_state *create_state(void)
>   {
>   	struct level_state *state = NULL;
> @@ -5176,6 +5615,685 @@ int create_l2_state(struct kvm_vcpu *vcpu)
>
>   	return 0;
>   }
> +int prepare_vmcs_02(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct shadow_vmcs *src = vmx->nested.l2_state->shadow_vmcs;
> +	u32 exec_control;
> +
> +	if (!src) {
> +		printk(KERN_INFO "%s: Error no shadow vmcs\n", __func__);
> +		return 1;
> +	}
> +
> +	load_vmcs_common(src);
> +
> +	if (cpu_has_vmx_vpid()&&  vmx->nested.l2_state->vpid != 0)
> +		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.l2_state->vpid);
> +
> +	if (vmx->nested.l2_state->io_bitmap_a)
> +		vmcs_write64(IO_BITMAP_A, vmx->nested.l2_state->io_bitmap_a);
> +
> +	if (vmx->nested.l2_state->io_bitmap_b)
> +		vmcs_write64(IO_BITMAP_B, vmx->nested.l2_state->io_bitmap_b);
> +
> +	if (vmx->nested.l2_state->msr_bitmap)
> +		vmcs_write64(MSR_BITMAP, vmx->nested.l2_state->msr_bitmap);
>    

Don't we need to combine the host and guest msr bitmaps and I/O 
bitmaps?  If the host doesn't allow an msr or I/O access to the guest, 
it shouldn't allow it to nested guests.

> +
> +	if (src->vm_entry_msr_load_count>  0) {
> +		struct page *page;
> +
> +		page = nested_get_page(vcpu,
> +				       src->vm_entry_msr_load_addr);
> +		if (!page)
> +			return 1;
> +
> +		vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, page_to_phys(page));
>    

Luckily we don't use the msr autoload stuff.  If we did we'd have to 
merge it too.  But We have to emulate those loads (via vmx_set_msr), the 
guest can easily load bad msrs which would kill the host.

> +	if (src->virtual_apic_page_addr != 0) {
> +		struct page *page;
> +
> +		page = nested_get_page(vcpu,
> +				       src->virtual_apic_page_addr);
> +		if (!page)
> +			return 1;
> +
> +		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(page));
> +
> +		kvm_release_page_clean(page);
> +	}  else {
> +		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
> +			     src->virtual_apic_page_addr);
> +	}
>    

Don't understand the special zero value.

> +
> +	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
> +		     (vmx->nested.l1_state->shadow_vmcs->pin_based_vm_exec_control |
> +		      src->pin_based_vm_exec_control));
> +
> +	exec_control = vmx->nested.l1_state->shadow_vmcs->cpu_based_vm_exec_control;
> +
> +	exec_control&= ~CPU_BASED_VIRTUAL_INTR_PENDING;
> +
> +	exec_control&= ~CPU_BASED_VIRTUAL_NMI_PENDING;
> +
> +	exec_control&= ~CPU_BASED_TPR_SHADOW;
>    

Why?

> +	if (enable_vpid) {
> +		if (vmx->nested.l2_state->vpid == 0) {
> +			allocate_vpid(vmx);
> +			vmx->nested.l2_state->vpid = vmx->vpid;
>    

What if the guest has a nonzero vpid?

> +static int nested_vmx_vmexit(struct kvm_vcpu *vcpu,
> +			     bool is_interrupt)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	int initial_pfu_active = vcpu->fpu_active;
> +
> +	if (!vmx->nested.nested_mode) {
> +		printk(KERN_INFO "WARNING: %s called but not in nested mode\n",
> +		       __func__);
> +		return 0;
> +	}
> +
> +	save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
> +
> +	sync_cached_regs_to_vmcs(vcpu);
> +
> +	prepare_vmcs_12(vcpu);
> +	if (is_interrupt)
> +		vmx->nested.l2_state->shadow_vmcs->vm_exit_reason =
> +			EXIT_REASON_EXTERNAL_INTERRUPT;
>    

Need to auto-ack the interrupt if requested by the guest.



-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


  reply	other threads:[~2009-09-02 21:37 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-02 15:38 Nested VMX support - kernel v1 oritw
2009-09-02 15:38 ` [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff oritw
2009-09-02 15:38   ` [PATCH 2/6] Nested VMX patch 2 implements vmclear oritw
2009-09-02 15:38     ` [PATCH 3/6] Nested VMX patch 3 implements vmptrld and vmptrst oritw
2009-09-02 15:38       ` [PATCH 2/2] Nested VMX patch 4 implements vmread and vmwrite oritw
2009-09-02 15:38         ` [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume oritw
2009-09-02 21:38           ` Avi Kivity [this message]
2009-09-03 14:53             ` Orit Wasserman
2009-09-06  9:29               ` Avi Kivity
2009-09-06 13:38                 ` Orit Wasserman
2009-09-02 20:15         ` [PATCH 2/2] Nested VMX patch 4 implements vmread and vmwrite Avi Kivity
2009-09-03 14:26           ` Orit Wasserman
2009-09-02 20:05       ` [PATCH 3/6] Nested VMX patch 3 implements vmptrld and vmptrst Avi Kivity
2009-09-03 14:25         ` Orit Wasserman
2009-09-06  9:25           ` Avi Kivity
2009-09-06 13:36             ` Orit Wasserman
2009-09-06 13:52               ` Avi Kivity
2009-09-06 16:55                 ` Orit Wasserman
2009-09-06 19:10                   ` Avi Kivity
2009-09-02 19:38     ` [PATCH 2/6] Nested VMX patch 2 implements vmclear Avi Kivity
2009-09-03 13:54       ` Orit Wasserman
2009-09-02 19:34   ` [PATCH 1/6] Nested VMX patch 1 implements vmon and vmoff Avi Kivity
2009-09-03 12:34     ` Orit Wasserman
2009-09-03 13:39       ` Avi Kivity
2009-09-03 14:54         ` Orit Wasserman
2009-09-02 15:57 ` Nested VMX support - kernel v1 Alexander Graf
2009-09-03  6:01   ` Muli Ben-Yehuda
2009-09-03  7:29     ` Alexander Graf
2009-09-03  9:53       ` Muli Ben-Yehuda
2009-09-06 19:28         ` Anthony Liguori
2009-09-02 21:39 ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4A9EE5C8.8040107@redhat.com \
    --to=avi@redhat.com \
    --cc=abelg@il.ibm.com \
    --cc=aliguori@us.ibm.com \
    --cc=benami@il.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=mmday@us.ibm.com \
    --cc=muli@il.ibm.com \
    --cc=oritw@il.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.