From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paolo Bonzini Subject: Re: [PATCH v3] KVM: x86: INIT and reset sequences are different Date: Mon, 13 Apr 2015 16:45:41 +0200 Message-ID: <552BD695.4060408@redhat.com> References: <1428924848-28212-1-git-send-email-namit@cs.technion.ac.il> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: kvm@vger.kernel.org To: Nadav Amit , bsd@redhat.com, joro@8bytes.org Return-path: Received: from mx1.redhat.com ([209.132.183.28]:36435 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932116AbbDMOpx (ORCPT ); Mon, 13 Apr 2015 10:45:53 -0400 In-Reply-To: <1428924848-28212-1-git-send-email-namit@cs.technion.ac.il> Sender: kvm-owner@vger.kernel.org List-ID: On 13/04/2015 13:34, Nadav Amit wrote: > x86 architecture defines differences between the reset and INIT seque= nces. > INIT does not initialize the FPU (including MMX, XMM, YMM, etc.), TSC= , PMU, > MSRs (in general), MTRRs machine-check, APIC ID, APIC arbitration ID = and BSP. >=20 > References (from Intel SDM): >=20 > "If the MP protocol has completed and a BSP is chosen, subsequent INI= Ts (either > to a specific processor or system wide) do not cause the MP protocol = to be > repeated." [8.4.2: MP Initialization Protocol Requirements and Restri= ctions] >=20 > [Table 9-1. IA-32 Processor States Following Power-up, Reset, or INIT= ] >=20 > "If the processor is reset by asserting the INIT# pin, the x87 FPU st= ate is not > changed." [9.2: X87 FPU INITIALIZATION] >=20 > "The state of the local APIC following an INIT reset is the same as i= t is after > a power-up or hardware reset, except that the APIC ID and arbitration= ID > registers are not affected." [10.4.7.3: Local APIC State After an INI= T Reset > (=E2=80=9CWait-for-SIPI=E2=80=9D State)] >=20 > Signed-off-by: Nadav Amit >=20 > --- >=20 > v3: >=20 > - Leave EFER unchanged on INIT. Instead, set cr0 correctly so vmx_set= _cr0 would > recognize that paging was changed from on to off and clear LMA. > - Clean the surrounding from unnecassary indirection of vmx->vcpu. > - Change svm similarly to vmx (UNTESTED). Thanks, applied (locally) for 4.2. It will take a few weeks before it gets to kvm/next and in the meanwhile I'll make sure to test on SVM as well, and to integrate the kvm-unit-tests patches. Paolo > v2: >=20 > - Same as v1 (was part of a patch-set that was modified due to missin= g tests) > --- > arch/x86/include/asm/kvm_host.h | 6 ++--- > arch/x86/kvm/lapic.c | 11 +++++---- > arch/x86/kvm/lapic.h | 2 +- > arch/x86/kvm/svm.c | 27 +++++++++++----------- > arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++--------= ---------- > arch/x86/kvm/x86.c | 17 ++++++++------ > 6 files changed, 63 insertions(+), 51 deletions(-) >=20 > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/k= vm_host.h > index f80ad59..3a19e30 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -711,7 +711,7 @@ struct kvm_x86_ops { > /* Create, but do not attach this VCPU */ > struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id); > void (*vcpu_free)(struct kvm_vcpu *vcpu); > - void (*vcpu_reset)(struct kvm_vcpu *vcpu); > + void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); > =20 > void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); > void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); > @@ -1001,7 +1001,7 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int= irq_source_id); > =20 > void kvm_inject_nmi(struct kvm_vcpu *vcpu); > =20 > -int fx_init(struct kvm_vcpu *vcpu); > +int fx_init(struct kvm_vcpu *vcpu, bool init_event); > =20 > void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, > const u8 *new, int bytes); > @@ -1145,7 +1145,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu= *v); > int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); > int kvm_cpu_get_interrupt(struct kvm_vcpu *v); > -void kvm_vcpu_reset(struct kvm_vcpu *vcpu); > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); > void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); > void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, > unsigned long address); > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index fe2d89e..a91fb2f 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1557,7 +1557,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, = u64 value) > =20 > } > =20 > -void kvm_lapic_reset(struct kvm_vcpu *vcpu) > +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct kvm_lapic *apic; > int i; > @@ -1571,7 +1571,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) > /* Stop the timer in case it's a reset to an active apic */ > hrtimer_cancel(&apic->lapic_timer.timer); > =20 > - kvm_apic_set_id(apic, vcpu->vcpu_id); > + if (!init_event) > + kvm_apic_set_id(apic, vcpu->vcpu_id); > kvm_apic_set_version(apic->vcpu); > =20 > for (i =3D 0; i < APIC_LVT_NUM; i++) > @@ -1713,7 +1714,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) > APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); > =20 > static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset= */ > - kvm_lapic_reset(vcpu); > + kvm_lapic_reset(vcpu, false); > kvm_iodevice_init(&apic->dev, &apic_mmio_ops); > =20 > return 0; > @@ -2047,8 +2048,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vc= pu) > pe =3D xchg(&apic->pending_events, 0); > =20 > if (test_bit(KVM_APIC_INIT, &pe)) { > - kvm_lapic_reset(vcpu); > - kvm_vcpu_reset(vcpu); > + kvm_lapic_reset(vcpu, true); > + kvm_vcpu_reset(vcpu, true); > if (kvm_vcpu_is_bsp(apic->vcpu)) > vcpu->arch.mp_state =3D KVM_MP_STATE_RUNNABLE; > else > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 9d28383..793f761 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu); > int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu); > int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu); > void kvm_apic_accept_events(struct kvm_vcpu *vcpu); > -void kvm_lapic_reset(struct kvm_vcpu *vcpu); > +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event); > u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); > void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); > void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 46299da..b6ad0f9 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -1082,7 +1082,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vc= pu *vcpu, u64 target_tsc) > return target_tsc - tsc; > } > =20 > -static void init_vmcb(struct vcpu_svm *svm) > +static void init_vmcb(struct vcpu_svm *svm, bool init_event) > { > struct vmcb_control_area *control =3D &svm->vmcb->control; > struct vmcb_save_area *save =3D &svm->vmcb->save; > @@ -1153,17 +1153,17 @@ static void init_vmcb(struct vcpu_svm *svm) > init_sys_seg(&save->ldtr, SEG_TYPE_LDT); > init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); > =20 > - svm_set_efer(&svm->vcpu, 0); > + if (!init_event) > + svm_set_efer(&svm->vcpu, 0); > save->dr6 =3D 0xffff0ff0; > kvm_set_rflags(&svm->vcpu, 2); > save->rip =3D 0x0000fff0; > svm->vcpu.arch.regs[VCPU_REGS_RIP] =3D save->rip; > =20 > /* > - * This is the guest-visible cr0 value. > * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. > + * It also updates the guest-visible cr0 value. > */ > - svm->vcpu.arch.cr0 =3D 0; > (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET)= ; > =20 > save->cr4 =3D X86_CR4_PAE; > @@ -1195,13 +1195,19 @@ static void init_vmcb(struct vcpu_svm *svm) > enable_gif(svm); > } > =20 > -static void svm_vcpu_reset(struct kvm_vcpu *vcpu) > +static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct vcpu_svm *svm =3D to_svm(vcpu); > u32 dummy; > u32 eax =3D 1; > =20 > - init_vmcb(svm); > + if (!init_event) { > + svm->vcpu.arch.apic_base =3D APIC_DEFAULT_PHYS_BASE | > + MSR_IA32_APICBASE_ENABLE; > + if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) > + svm->vcpu.arch.apic_base |=3D MSR_IA32_APICBASE_BSP; > + } > + init_vmcb(svm, init_event); > =20 > kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); > kvm_register_write(vcpu, VCPU_REGS_RDX, eax); > @@ -1257,12 +1263,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct= kvm *kvm, unsigned int id) > clear_page(svm->vmcb); > svm->vmcb_pa =3D page_to_pfn(page) << PAGE_SHIFT; > svm->asid_generation =3D 0; > - init_vmcb(svm); > - > - svm->vcpu.arch.apic_base =3D APIC_DEFAULT_PHYS_BASE | > - MSR_IA32_APICBASE_ENABLE; > - if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) > - svm->vcpu.arch.apic_base |=3D MSR_IA32_APICBASE_BSP; > + init_vmcb(svm, false); > =20 > svm_init_osvw(&svm->vcpu); > =20 > @@ -1884,7 +1885,7 @@ static int shutdown_interception(struct vcpu_sv= m *svm) > * so reinitialize it. > */ > clear_page(svm->vmcb); > - init_vmcb(svm); > + init_vmcb(svm, false); > =20 > kvm_run->exit_reason =3D KVM_EXIT_SHUTDOWN; > return 0; > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 8c14d6a..f7a0a7f 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -4694,22 +4694,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vm= x) > return 0; > } > =20 > -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) > +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > struct vcpu_vmx *vmx =3D to_vmx(vcpu); > struct msr_data apic_base_msr; > + u64 cr0; > =20 > vmx->rmode.vm86_active =3D 0; > =20 > vmx->soft_vnmi_blocked =3D 0; > =20 > vmx->vcpu.arch.regs[VCPU_REGS_RDX] =3D get_rdx_init_val(); > - kvm_set_cr8(&vmx->vcpu, 0); > - apic_base_msr.data =3D APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_E= NABLE; > - if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) > - apic_base_msr.data |=3D MSR_IA32_APICBASE_BSP; > - apic_base_msr.host_initiated =3D true; > - kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); > + kvm_set_cr8(vcpu, 0); > + > + if (!init_event) { > + apic_base_msr.data =3D APIC_DEFAULT_PHYS_BASE | > + MSR_IA32_APICBASE_ENABLE; > + if (kvm_vcpu_is_reset_bsp(vcpu)) > + apic_base_msr.data |=3D MSR_IA32_APICBASE_BSP; > + apic_base_msr.host_initiated =3D true; > + kvm_set_apic_base(vcpu, &apic_base_msr); > + } > =20 > vmx_segment_cache_clear(vmx); > =20 > @@ -4733,9 +4738,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vc= pu) > vmcs_write32(GUEST_LDTR_LIMIT, 0xffff); > vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082); > =20 > - vmcs_write32(GUEST_SYSENTER_CS, 0); > - vmcs_writel(GUEST_SYSENTER_ESP, 0); > - vmcs_writel(GUEST_SYSENTER_EIP, 0); > + if (!init_event) { > + vmcs_write32(GUEST_SYSENTER_CS, 0); > + vmcs_writel(GUEST_SYSENTER_ESP, 0); > + vmcs_writel(GUEST_SYSENTER_EIP, 0); > + vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > + } > =20 > vmcs_writel(GUEST_RFLAGS, 0x02); > kvm_rip_write(vcpu, 0xfff0); > @@ -4750,18 +4758,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *v= cpu) > vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); > vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); > =20 > - /* Special registers */ > - vmcs_write64(GUEST_IA32_DEBUGCTL, 0); > - > setup_msrs(vmx); > =20 > vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */ > =20 > - if (cpu_has_vmx_tpr_shadow()) { > + if (cpu_has_vmx_tpr_shadow() && !init_event) { > vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); > - if (vm_need_tpr_shadow(vmx->vcpu.kvm)) > + if (vm_need_tpr_shadow(vcpu->kvm)) > vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, > - __pa(vmx->vcpu.arch.apic->regs)); > + __pa(vcpu->arch.apic->regs)); > vmcs_write32(TPR_THRESHOLD, 0); > } > =20 > @@ -4773,12 +4778,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *v= cpu) > if (vmx->vpid !=3D 0) > vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); > =20 > - vmx->vcpu.arch.cr0 =3D X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; > - vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ > - vmx_set_cr4(&vmx->vcpu, 0); > - vmx_set_efer(&vmx->vcpu, 0); > - vmx_fpu_activate(&vmx->vcpu); > - update_exception_bitmap(&vmx->vcpu); > + cr0 =3D X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; > + vmx_set_cr0(vcpu, cr0); /* enter rmode */ > + vmx->vcpu.arch.cr0 =3D cr0; > + vmx_set_cr4(vcpu, 0); > + if (!init_event) > + vmx_set_efer(vcpu, 0); > + vmx_fpu_activate(vcpu); > + update_exception_bitmap(vcpu); > =20 > vpid_sync_context(vmx); > } > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c3859a6..ea3584b 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -7011,7 +7011,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu= *vcpu, struct kvm_fpu *fpu) > return 0; > } > =20 > -int fx_init(struct kvm_vcpu *vcpu) > +int fx_init(struct kvm_vcpu *vcpu, bool init_event) > { > int err; > =20 > @@ -7019,7 +7019,9 @@ int fx_init(struct kvm_vcpu *vcpu) > if (err) > return err; > =20 > - fpu_finit(&vcpu->arch.guest_fpu); > + if (!init_event) > + fpu_finit(&vcpu->arch.guest_fpu); > + > if (cpu_has_xsaves) > vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =3D > host_xcr0 | XSTATE_COMPACTION_ENABLED; > @@ -7099,7 +7101,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) > r =3D vcpu_load(vcpu); > if (r) > return r; > - kvm_vcpu_reset(vcpu); > + kvm_vcpu_reset(vcpu, false); > kvm_mmu_setup(vcpu); > vcpu_put(vcpu); > =20 > @@ -7137,7 +7139,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcp= u) > kvm_x86_ops->vcpu_free(vcpu); > } > =20 > -void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) > { > atomic_set(&vcpu->arch.nmi_queued, 0); > vcpu->arch.nmi_pending =3D 0; > @@ -7164,13 +7166,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) > kvm_async_pf_hash_reset(vcpu); > vcpu->arch.apf.halted =3D false; > =20 > - kvm_pmu_reset(vcpu); > + if (!init_event) > + kvm_pmu_reset(vcpu); > =20 > memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); > vcpu->arch.regs_avail =3D ~0; > vcpu->arch.regs_dirty =3D ~0; > =20 > - kvm_x86_ops->vcpu_reset(vcpu); > + kvm_x86_ops->vcpu_reset(vcpu, init_event); > } > =20 > void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) > @@ -7352,7 +7355,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > goto fail_free_mce_banks; > } > =20 > - r =3D fx_init(vcpu); > + r =3D fx_init(vcpu, false); > if (r) > goto fail_free_wbinvd_dirty_mask; > =20 >=20