From: Paolo Bonzini <pbonzini@redhat.com>
To: Nadav Amit <namit@cs.technion.ac.il>, bsd@redhat.com, joro@8bytes.org
Cc: kvm@vger.kernel.org
Subject: Re: [PATCH v3] KVM: x86: INIT and reset sequences are different
Date: Mon, 13 Apr 2015 16:45:41 +0200 [thread overview]
Message-ID: <552BD695.4060408@redhat.com> (raw)
In-Reply-To: <1428924848-28212-1-git-send-email-namit@cs.technion.ac.il>
On 13/04/2015 13:34, Nadav Amit wrote:
> x86 architecture defines differences between the reset and INIT sequences.
> INIT does not initialize the FPU (including MMX, XMM, YMM, etc.), TSC, PMU,
> MSRs (in general), MTRRs machine-check, APIC ID, APIC arbitration ID and BSP.
>
> References (from Intel SDM):
>
> "If the MP protocol has completed and a BSP is chosen, subsequent INITs (either
> to a specific processor or system wide) do not cause the MP protocol to be
> repeated." [8.4.2: MP Initialization Protocol Requirements and Restrictions]
>
> [Table 9-1. IA-32 Processor States Following Power-up, Reset, or INIT]
>
> "If the processor is reset by asserting the INIT# pin, the x87 FPU state is not
> changed." [9.2: X87 FPU INITIALIZATION]
>
> "The state of the local APIC following an INIT reset is the same as it is after
> a power-up or hardware reset, except that the APIC ID and arbitration ID
> registers are not affected." [10.4.7.3: Local APIC State After an INIT Reset
> (“Wait-for-SIPI” State)]
>
> Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
>
> ---
>
> v3:
>
> - Leave EFER unchanged on INIT. Instead, set cr0 correctly so vmx_set_cr0 would
> recognize that paging was changed from on to off and clear LMA.
> - Clean the surrounding from unnecassary indirection of vmx->vcpu.
> - Change svm similarly to vmx (UNTESTED).
Thanks, applied (locally) for 4.2. It will take a few weeks before it
gets to kvm/next and in the meanwhile I'll make sure to test on SVM as
well, and to integrate the kvm-unit-tests patches.
Paolo
> v2:
>
> - Same as v1 (was part of a patch-set that was modified due to missing tests)
> ---
> arch/x86/include/asm/kvm_host.h | 6 ++---
> arch/x86/kvm/lapic.c | 11 +++++----
> arch/x86/kvm/lapic.h | 2 +-
> arch/x86/kvm/svm.c | 27 +++++++++++-----------
> arch/x86/kvm/vmx.c | 51 +++++++++++++++++++++++------------------
> arch/x86/kvm/x86.c | 17 ++++++++------
> 6 files changed, 63 insertions(+), 51 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f80ad59..3a19e30 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -711,7 +711,7 @@ struct kvm_x86_ops {
> /* Create, but do not attach this VCPU */
> struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
> void (*vcpu_free)(struct kvm_vcpu *vcpu);
> - void (*vcpu_reset)(struct kvm_vcpu *vcpu);
> + void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
>
> void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
> void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
> @@ -1001,7 +1001,7 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
>
> void kvm_inject_nmi(struct kvm_vcpu *vcpu);
>
> -int fx_init(struct kvm_vcpu *vcpu);
> +int fx_init(struct kvm_vcpu *vcpu, bool init_event);
>
> void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
> const u8 *new, int bytes);
> @@ -1145,7 +1145,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
> int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
> int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
> int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
> -void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
> +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
> void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
> void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
> unsigned long address);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index fe2d89e..a91fb2f 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1557,7 +1557,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
>
> }
>
> -void kvm_lapic_reset(struct kvm_vcpu *vcpu)
> +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> struct kvm_lapic *apic;
> int i;
> @@ -1571,7 +1571,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
> /* Stop the timer in case it's a reset to an active apic */
> hrtimer_cancel(&apic->lapic_timer.timer);
>
> - kvm_apic_set_id(apic, vcpu->vcpu_id);
> + if (!init_event)
> + kvm_apic_set_id(apic, vcpu->vcpu_id);
> kvm_apic_set_version(apic->vcpu);
>
> for (i = 0; i < APIC_LVT_NUM; i++)
> @@ -1713,7 +1714,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
> APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
>
> static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
> - kvm_lapic_reset(vcpu);
> + kvm_lapic_reset(vcpu, false);
> kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
>
> return 0;
> @@ -2047,8 +2048,8 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
> pe = xchg(&apic->pending_events, 0);
>
> if (test_bit(KVM_APIC_INIT, &pe)) {
> - kvm_lapic_reset(vcpu);
> - kvm_vcpu_reset(vcpu);
> + kvm_lapic_reset(vcpu, true);
> + kvm_vcpu_reset(vcpu, true);
> if (kvm_vcpu_is_bsp(apic->vcpu))
> vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> else
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 9d28383..793f761 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -48,7 +48,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
> int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
> int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
> void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
> -void kvm_lapic_reset(struct kvm_vcpu *vcpu);
> +void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
> u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
> void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
> void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 46299da..b6ad0f9 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1082,7 +1082,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
> return target_tsc - tsc;
> }
>
> -static void init_vmcb(struct vcpu_svm *svm)
> +static void init_vmcb(struct vcpu_svm *svm, bool init_event)
> {
> struct vmcb_control_area *control = &svm->vmcb->control;
> struct vmcb_save_area *save = &svm->vmcb->save;
> @@ -1153,17 +1153,17 @@ static void init_vmcb(struct vcpu_svm *svm)
> init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
> init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
>
> - svm_set_efer(&svm->vcpu, 0);
> + if (!init_event)
> + svm_set_efer(&svm->vcpu, 0);
> save->dr6 = 0xffff0ff0;
> kvm_set_rflags(&svm->vcpu, 2);
> save->rip = 0x0000fff0;
> svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
>
> /*
> - * This is the guest-visible cr0 value.
> * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
> + * It also updates the guest-visible cr0 value.
> */
> - svm->vcpu.arch.cr0 = 0;
> (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
>
> save->cr4 = X86_CR4_PAE;
> @@ -1195,13 +1195,19 @@ static void init_vmcb(struct vcpu_svm *svm)
> enable_gif(svm);
> }
>
> -static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
> +static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> u32 dummy;
> u32 eax = 1;
>
> - init_vmcb(svm);
> + if (!init_event) {
> + svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
> + MSR_IA32_APICBASE_ENABLE;
> + if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
> + svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
> + }
> + init_vmcb(svm, init_event);
>
> kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
> kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
> @@ -1257,12 +1263,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
> clear_page(svm->vmcb);
> svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
> svm->asid_generation = 0;
> - init_vmcb(svm);
> -
> - svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
> - MSR_IA32_APICBASE_ENABLE;
> - if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
> - svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
> + init_vmcb(svm, false);
>
> svm_init_osvw(&svm->vcpu);
>
> @@ -1884,7 +1885,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
> * so reinitialize it.
> */
> clear_page(svm->vmcb);
> - init_vmcb(svm);
> + init_vmcb(svm, false);
>
> kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
> return 0;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 8c14d6a..f7a0a7f 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4694,22 +4694,27 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
> return 0;
> }
>
> -static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> +static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct msr_data apic_base_msr;
> + u64 cr0;
>
> vmx->rmode.vm86_active = 0;
>
> vmx->soft_vnmi_blocked = 0;
>
> vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> - kvm_set_cr8(&vmx->vcpu, 0);
> - apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
> - if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
> - apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
> - apic_base_msr.host_initiated = true;
> - kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
> + kvm_set_cr8(vcpu, 0);
> +
> + if (!init_event) {
> + apic_base_msr.data = APIC_DEFAULT_PHYS_BASE |
> + MSR_IA32_APICBASE_ENABLE;
> + if (kvm_vcpu_is_reset_bsp(vcpu))
> + apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
> + apic_base_msr.host_initiated = true;
> + kvm_set_apic_base(vcpu, &apic_base_msr);
> + }
>
> vmx_segment_cache_clear(vmx);
>
> @@ -4733,9 +4738,12 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
> vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
>
> - vmcs_write32(GUEST_SYSENTER_CS, 0);
> - vmcs_writel(GUEST_SYSENTER_ESP, 0);
> - vmcs_writel(GUEST_SYSENTER_EIP, 0);
> + if (!init_event) {
> + vmcs_write32(GUEST_SYSENTER_CS, 0);
> + vmcs_writel(GUEST_SYSENTER_ESP, 0);
> + vmcs_writel(GUEST_SYSENTER_EIP, 0);
> + vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> + }
>
> vmcs_writel(GUEST_RFLAGS, 0x02);
> kvm_rip_write(vcpu, 0xfff0);
> @@ -4750,18 +4758,15 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
> vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
>
> - /* Special registers */
> - vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
> -
> setup_msrs(vmx);
>
> vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
>
> - if (cpu_has_vmx_tpr_shadow()) {
> + if (cpu_has_vmx_tpr_shadow() && !init_event) {
> vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
> - if (vm_need_tpr_shadow(vmx->vcpu.kvm))
> + if (vm_need_tpr_shadow(vcpu->kvm))
> vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
> - __pa(vmx->vcpu.arch.apic->regs));
> + __pa(vcpu->arch.apic->regs));
> vmcs_write32(TPR_THRESHOLD, 0);
> }
>
> @@ -4773,12 +4778,14 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
> if (vmx->vpid != 0)
> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>
> - vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
> - vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
> - vmx_set_cr4(&vmx->vcpu, 0);
> - vmx_set_efer(&vmx->vcpu, 0);
> - vmx_fpu_activate(&vmx->vcpu);
> - update_exception_bitmap(&vmx->vcpu);
> + cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
> + vmx_set_cr0(vcpu, cr0); /* enter rmode */
> + vmx->vcpu.arch.cr0 = cr0;
> + vmx_set_cr4(vcpu, 0);
> + if (!init_event)
> + vmx_set_efer(vcpu, 0);
> + vmx_fpu_activate(vcpu);
> + update_exception_bitmap(vcpu);
>
> vpid_sync_context(vmx);
> }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c3859a6..ea3584b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7011,7 +7011,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> return 0;
> }
>
> -int fx_init(struct kvm_vcpu *vcpu)
> +int fx_init(struct kvm_vcpu *vcpu, bool init_event)
> {
> int err;
>
> @@ -7019,7 +7019,9 @@ int fx_init(struct kvm_vcpu *vcpu)
> if (err)
> return err;
>
> - fpu_finit(&vcpu->arch.guest_fpu);
> + if (!init_event)
> + fpu_finit(&vcpu->arch.guest_fpu);
> +
> if (cpu_has_xsaves)
> vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
> host_xcr0 | XSTATE_COMPACTION_ENABLED;
> @@ -7099,7 +7101,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> r = vcpu_load(vcpu);
> if (r)
> return r;
> - kvm_vcpu_reset(vcpu);
> + kvm_vcpu_reset(vcpu, false);
> kvm_mmu_setup(vcpu);
> vcpu_put(vcpu);
>
> @@ -7137,7 +7139,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> kvm_x86_ops->vcpu_free(vcpu);
> }
>
> -void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
> +void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> {
> atomic_set(&vcpu->arch.nmi_queued, 0);
> vcpu->arch.nmi_pending = 0;
> @@ -7164,13 +7166,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
> kvm_async_pf_hash_reset(vcpu);
> vcpu->arch.apf.halted = false;
>
> - kvm_pmu_reset(vcpu);
> + if (!init_event)
> + kvm_pmu_reset(vcpu);
>
> memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
> vcpu->arch.regs_avail = ~0;
> vcpu->arch.regs_dirty = ~0;
>
> - kvm_x86_ops->vcpu_reset(vcpu);
> + kvm_x86_ops->vcpu_reset(vcpu, init_event);
> }
>
> void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> @@ -7352,7 +7355,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> goto fail_free_mce_banks;
> }
>
> - r = fx_init(vcpu);
> + r = fx_init(vcpu, false);
> if (r)
> goto fail_free_wbinvd_dirty_mask;
>
>
next prev parent reply other threads:[~2015-04-13 14:45 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-13 11:34 [PATCH v3] KVM: x86: INIT and reset sequences are different Nadav Amit
2015-04-13 14:45 ` Paolo Bonzini [this message]
2015-10-01 12:55 ` Paolo Bonzini
2015-10-09 8:06 ` Zhang, Yang Z
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=552BD695.4060408@redhat.com \
--to=pbonzini@redhat.com \
--cc=bsd@redhat.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=namit@cs.technion.ac.il \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).