* [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
@ 2013-09-06 2:04 Arthur Chunqi Li
2013-09-13 17:15 ` Paolo Bonzini
2013-09-15 12:31 ` Gleb Natapov
0 siblings, 2 replies; 15+ messages in thread
From: Arthur Chunqi Li @ 2013-09-06 2:04 UTC (permalink / raw)
To: kvm; +Cc: jan.kiszka, gleb, pbonzini, yang.z.zhang, Arthur Chunqi Li
This patch contains the following two changes:
1. Fix the bug in nested preemption timer support. If vmexit L2->L0
with some reasons not emulated by L1, preemption timer value should
be save in such exits.
2. Add support of "Save VMX-preemption timer value" VM-Exit controls
to nVMX.
With this patch, nested VMX preemption timer features are fully
supported.
Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
---
ChangeLog to v3:
Move nested_adjust_preemption_timer to the latest place just before vmenter.
Some minor changes.
arch/x86/include/uapi/asm/msr-index.h | 1 +
arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
2 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index bb04650..b93e09a 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -536,6 +536,7 @@
/* MSR_IA32_VMX_MISC bits */
#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f1da43..f364d16 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -374,6 +374,8 @@ struct nested_vmx {
*/
struct page *apic_access_page;
u64 msr_ia32_feature_control;
+ /* Set if vmexit is L2->L1 */
+ bool nested_vmx_exit;
};
#define POSTED_INTR_ON 0
@@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
- VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
+ VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ if (!(nested_vmx_pinbased_ctls_high &
+ PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(nested_vmx_exit_ctls_high &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
+ nested_vmx_exit_ctls_high &=
+ (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
+ nested_vmx_pinbased_ctls_high &=
+ (~PIN_BASED_VMX_PREEMPTION_TIMER);
+ }
nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER);
@@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
*info2 = vmcs_read32(VM_EXIT_INTR_INFO);
}
+static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
+{
+ u64 delta_tsc_l1;
+ u32 preempt_val_l1, preempt_val_l2, preempt_scale;
+
+ preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
+ MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
+ preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+ delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
+ native_read_tsc()) - vcpu->arch.last_guest_tsc;
+ preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
+ if (preempt_val_l2 <= preempt_val_l1)
+ preempt_val_l2 = 0;
+ else
+ preempt_val_l2 -= preempt_val_l1;
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
+}
+
/*
* The guest has exited. See if we can fix it or if we need userspace
* assistance.
@@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
vmx->nested.nested_run_pending = 0;
if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
+ vmx->nested.nested_vmx_exit = true;
nested_vmx_vmexit(vcpu);
return 1;
}
+ vmx->nested.nested_vmx_exit = false;
if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
debugctlmsr = get_debugctlmsr();
vmx->__launched = vmx->loaded_vmcs->launched;
+ if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
+ nested_adjust_preemption_timer(vcpu);
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
+ u32 exit_control;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
* bits are further modified by vmx_set_efer() below.
*/
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+ exit_control = vmcs_config.vmexit_ctrl;
+ if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
+ exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
+ vmcs_write32(VM_EXIT_CONTROLS, exit_control);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
@@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_pending_dbg_exceptions =
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+ if ((vmcs12->pin_based_vm_exec_control &
+ PIN_BASED_VMX_PREEMPTION_TIMER) &&
+ (vmcs12->vm_exit_controls &
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ vmcs12->vmx_preemption_timer_value =
+ vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
+
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
* own CR3 without exiting. If it has changed it, we must keep it.
--
1.7.9.5
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-06 2:04 [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer Arthur Chunqi Li
@ 2013-09-13 17:15 ` Paolo Bonzini
2013-09-14 7:44 ` Jan Kiszka
` (2 more replies)
2013-09-15 12:31 ` Gleb Natapov
1 sibling, 3 replies; 15+ messages in thread
From: Paolo Bonzini @ 2013-09-13 17:15 UTC (permalink / raw)
To: Arthur Chunqi Li; +Cc: kvm, jan.kiszka, gleb, yang.z.zhang
Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
> This patch contains the following two changes:
> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> with some reasons not emulated by L1, preemption timer value should
> be save in such exits.
> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> to nVMX.
>
> With this patch, nested VMX preemption timer features are fully
> supported.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
> ChangeLog to v3:
> Move nested_adjust_preemption_timer to the latest place just before vmenter.
> Some minor changes.
>
> arch/x86/include/uapi/asm/msr-index.h | 1 +
> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
> 2 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index bb04650..b93e09a 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -536,6 +536,7 @@
>
> /* MSR_IA32_VMX_MISC bits */
> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
> /* AMD-V MSRs */
>
> #define MSR_VM_CR 0xc0010114
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1f1da43..f364d16 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -374,6 +374,8 @@ struct nested_vmx {
> */
> struct page *apic_access_page;
> u64 msr_ia32_feature_control;
> + /* Set if vmexit is L2->L1 */
> + bool nested_vmx_exit;
> };
>
> #define POSTED_INTR_ON 0
> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> #ifdef CONFIG_X86_64
> VM_EXIT_HOST_ADDR_SPACE_SIZE |
> #endif
> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> + if (!(nested_vmx_pinbased_ctls_high &
> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
> + !(nested_vmx_exit_ctls_high &
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
Align this under the other "!". Also, I prefer to have one long line
for the whole "!(... & ...) ||" (and likewise below), but I don't know
if Gleb agrees
> + nested_vmx_exit_ctls_high &=
> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and
likewise elsewhere in the patch.
> + nested_vmx_pinbased_ctls_high &=
> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
> + }
> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> VM_EXIT_LOAD_IA32_EFER);
>
> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
> }
>
> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> +{
> + u64 delta_tsc_l1;
> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
Should this exit immediately if the preemption timer pin-based control
is disabled?
> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
Please format this like:
delta_tsc_l1 =
kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc())
- vcpu->arch.last_guest_tsc;
> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> + if (preempt_val_l2 <= preempt_val_l1)
> + preempt_val_l2 = 0;
> + else
> + preempt_val_l2 -= preempt_val_l1;
> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
Did you test that a value of 0 triggers an immediate exit, rather than
counting down by 2^32? Perhaps it's safer to limit the value to 1
instead of 0.
> +}
> +
> /*
> * The guest has exited. See if we can fix it or if we need userspace
> * assistance.
> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> vmx->nested.nested_run_pending = 0;
>
> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
> + vmx->nested.nested_vmx_exit = true;
I think this assignment should be in nested_vmx_vmexit, since it is
called from other places as well.
> nested_vmx_vmexit(vcpu);
> return 1;
> }
> + vmx->nested.nested_vmx_exit = false;
>
> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> debugctlmsr = get_debugctlmsr();
>
> vmx->__launched = vmx->loaded_vmcs->launched;
> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> + nested_adjust_preemption_timer(vcpu);
Please leave the assignment to __launched last, since it's already
initializing the asm below.
I don't like the is_guest_mode check here... Maybe it's
micro-optimizing, but I wonder if we already do too many checks in
vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
Alternatively, we could change nested_vmx_exit to an enum in struct
vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in
vmx_handle_exit. Then we could check directly for L0->L2 and not adjust
the preemption timer in other cases. In fact, I suspect this enum could
replace HF_GUEST_MASK altogether. However, this would require some
other, more complicated, changes to svm.c.
Gleb, what do you think?
Paolo
> asm(
> /* Store host registers */
> "push %%" _ASM_DX "; push %%" _ASM_BP ";"
> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exec_control;
> + u32 exit_control;
>
> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
> * bits are further modified by vmx_set_efer() below.
> */
> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> + exit_control = vmcs_config.vmexit_ctrl;
> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> + vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>
> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
> * emulated by vmx_set_efer(), below.
> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmcs12->guest_pending_dbg_exceptions =
> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>
> + if ((vmcs12->pin_based_vm_exec_control &
> + PIN_BASED_VMX_PREEMPTION_TIMER) &&
> + (vmcs12->vm_exit_controls &
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> + vmcs12->vmx_preemption_timer_value =
> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +
> /*
> * In some cases (usually, nested EPT), L2 is allowed to change its
> * own CR3 without exiting. If it has changed it, we must keep it.
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-13 17:15 ` Paolo Bonzini
@ 2013-09-14 7:44 ` Jan Kiszka
2013-09-15 7:59 ` Arthur Chunqi Li
2013-09-16 5:42 ` Arthur Chunqi Li
2013-09-16 7:44 ` Gleb Natapov
2 siblings, 1 reply; 15+ messages in thread
From: Jan Kiszka @ 2013-09-14 7:44 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, gleb, yang.z.zhang
[-- Attachment #1: Type: text/plain, Size: 601 bytes --]
On 2013-09-13 19:15, Paolo Bonzini wrote:
> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>> + if (preempt_val_l2 <= preempt_val_l1)
>> + preempt_val_l2 = 0;
>> + else
>> + preempt_val_l2 -= preempt_val_l1;
>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>
> Did you test that a value of 0 triggers an immediate exit, rather than
> counting down by 2^32? Perhaps it's safer to limit the value to 1
> instead of 0.
To my experience, 0 triggers immediate exists when the preemption timer
is enabled.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-14 7:44 ` Jan Kiszka
@ 2013-09-15 7:59 ` Arthur Chunqi Li
0 siblings, 0 replies; 15+ messages in thread
From: Arthur Chunqi Li @ 2013-09-15 7:59 UTC (permalink / raw)
To: Jan Kiszka; +Cc: Paolo Bonzini, kvm, Gleb Natapov, Yang Zhang
On Sat, Sep 14, 2013 at 3:44 PM, Jan Kiszka <jan.kiszka@web.de> wrote:
> On 2013-09-13 19:15, Paolo Bonzini wrote:
>> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
>>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>>> + if (preempt_val_l2 <= preempt_val_l1)
>>> + preempt_val_l2 = 0;
>>> + else
>>> + preempt_val_l2 -= preempt_val_l1;
>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>>
>> Did you test that a value of 0 triggers an immediate exit, rather than
>> counting down by 2^32? Perhaps it's safer to limit the value to 1
>> instead of 0.
>
> To my experience, 0 triggers immediate exists when the preemption timer
> is enabled.
Yes, L2 VM will exit immediately when the value is 0 with my patch.
Arthur
>
> Jan
>
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-06 2:04 [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer Arthur Chunqi Li
2013-09-13 17:15 ` Paolo Bonzini
@ 2013-09-15 12:31 ` Gleb Natapov
2013-09-16 5:49 ` Arthur Chunqi Li
1 sibling, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2013-09-15 12:31 UTC (permalink / raw)
To: Arthur Chunqi Li; +Cc: kvm, jan.kiszka, pbonzini, yang.z.zhang
On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote:
> This patch contains the following two changes:
> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> with some reasons not emulated by L1, preemption timer value should
> be save in such exits.
> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> to nVMX.
>
> With this patch, nested VMX preemption timer features are fully
> supported.
>
> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> ---
> ChangeLog to v3:
> Move nested_adjust_preemption_timer to the latest place just before vmenter.
> Some minor changes.
>
> arch/x86/include/uapi/asm/msr-index.h | 1 +
> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
> 2 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> index bb04650..b93e09a 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -536,6 +536,7 @@
>
> /* MSR_IA32_VMX_MISC bits */
> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
> /* AMD-V MSRs */
>
> #define MSR_VM_CR 0xc0010114
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1f1da43..f364d16 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -374,6 +374,8 @@ struct nested_vmx {
> */
> struct page *apic_access_page;
> u64 msr_ia32_feature_control;
> + /* Set if vmexit is L2->L1 */
> + bool nested_vmx_exit;
Do not see why it is needed, see bellow.
> };
>
> #define POSTED_INTR_ON 0
> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> #ifdef CONFIG_X86_64
> VM_EXIT_HOST_ADDR_SPACE_SIZE |
> #endif
> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> + if (!(nested_vmx_pinbased_ctls_high &
> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
> + !(nested_vmx_exit_ctls_high &
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
> + nested_vmx_exit_ctls_high &=
> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
> + nested_vmx_pinbased_ctls_high &=
> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
> + }
> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> VM_EXIT_LOAD_IA32_EFER);
>
> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
> }
>
> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> +{
> + u64 delta_tsc_l1;
> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
> +
> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> + if (preempt_val_l2 <= preempt_val_l1)
> + preempt_val_l2 = 0;
> + else
> + preempt_val_l2 -= preempt_val_l1;
> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
> +}
> +
> /*
> * The guest has exited. See if we can fix it or if we need userspace
> * assistance.
> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> vmx->nested.nested_run_pending = 0;
>
> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
> + vmx->nested.nested_vmx_exit = true;
> nested_vmx_vmexit(vcpu);
> return 1;
> }
> + vmx->nested.nested_vmx_exit = false;
>
> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> debugctlmsr = get_debugctlmsr();
>
> vmx->__launched = vmx->loaded_vmcs->launched;
> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
How is_guest_mode() and nested_vmx_exi can be both true? The only place
nested_vmx_exit is set to true is just before call to
nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes
is_guest_mode() false. To enter guest mode again at least one other
vmexit from L1 to L0 is needed at which point nested_vmx_exit will be
reset to false again.
If you want to avoid calling nested_adjust_preemption_timer() during
vmlaunch/vmresume emulation (and it looks like this is what you are
trying to achieve here) you can check nested_run_pending.
> + nested_adjust_preemption_timer(vcpu);
> asm(
> /* Store host registers */
> "push %%" _ASM_DX "; push %%" _ASM_BP ";"
> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exec_control;
> + u32 exit_control;
>
> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
> * bits are further modified by vmx_set_efer() below.
> */
> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> + exit_control = vmcs_config.vmexit_ctrl;
> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> + vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>
> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
> * emulated by vmx_set_efer(), below.
> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmcs12->guest_pending_dbg_exceptions =
> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>
> + if ((vmcs12->pin_based_vm_exec_control &
> + PIN_BASED_VMX_PREEMPTION_TIMER) &&
> + (vmcs12->vm_exit_controls &
> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
> + vmcs12->vmx_preemption_timer_value =
> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> +
> /*
> * In some cases (usually, nested EPT), L2 is allowed to change its
> * own CR3 without exiting. If it has changed it, we must keep it.
> --
> 1.7.9.5
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-13 17:15 ` Paolo Bonzini
2013-09-14 7:44 ` Jan Kiszka
@ 2013-09-16 5:42 ` Arthur Chunqi Li
2013-09-16 7:18 ` Jan Kiszka
2013-09-16 7:44 ` Gleb Natapov
2 siblings, 1 reply; 15+ messages in thread
From: Arthur Chunqi Li @ 2013-09-16 5:42 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: kvm, Jan Kiszka, Gleb Natapov, Yang Zhang
On Sat, Sep 14, 2013 at 1:15 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
>> This patch contains the following two changes:
>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
>> with some reasons not emulated by L1, preemption timer value should
>> be save in such exits.
>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
>> to nVMX.
>>
>> With this patch, nested VMX preemption timer features are fully
>> supported.
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>> ChangeLog to v3:
>> Move nested_adjust_preemption_timer to the latest place just before vmenter.
>> Some minor changes.
>>
>> arch/x86/include/uapi/asm/msr-index.h | 1 +
>> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
>> 2 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
>> index bb04650..b93e09a 100644
>> --- a/arch/x86/include/uapi/asm/msr-index.h
>> +++ b/arch/x86/include/uapi/asm/msr-index.h
>> @@ -536,6 +536,7 @@
>>
>> /* MSR_IA32_VMX_MISC bits */
>> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
>> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
>> /* AMD-V MSRs */
>>
>> #define MSR_VM_CR 0xc0010114
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1f1da43..f364d16 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -374,6 +374,8 @@ struct nested_vmx {
>> */
>> struct page *apic_access_page;
>> u64 msr_ia32_feature_control;
>> + /* Set if vmexit is L2->L1 */
>> + bool nested_vmx_exit;
>> };
>>
>> #define POSTED_INTR_ON 0
>> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>> #ifdef CONFIG_X86_64
>> VM_EXIT_HOST_ADDR_SPACE_SIZE |
>> #endif
>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + if (!(nested_vmx_pinbased_ctls_high &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
>> + !(nested_vmx_exit_ctls_high &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
>
> Align this under the other "!". Also, I prefer to have one long line
> for the whole "!(... & ...) ||" (and likewise below), but I don't know
> if Gleb agrees
>
>> + nested_vmx_exit_ctls_high &=
>> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
>
> Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and
> likewise elsewhere in the patch.
>
>> + nested_vmx_pinbased_ctls_high &=
>> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
>> + }
>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>> VM_EXIT_LOAD_IA32_EFER);
>>
>> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>> }
>>
>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
>> +{
>> + u64 delta_tsc_l1;
>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>
> Should this exit immediately if the preemption timer pin-based control
> is disabled?
Hi Paolo,
How can I get pin-based control here from "struct kvm_vcpu *vcpu"?
Arthur
>
>> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
>> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
>> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
>> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
>
> Please format this like:
>
> delta_tsc_l1 =
> kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc())
> - vcpu->arch.last_guest_tsc;
>
>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>> + if (preempt_val_l2 <= preempt_val_l1)
>> + preempt_val_l2 = 0;
>> + else
>> + preempt_val_l2 -= preempt_val_l1;
>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>
> Did you test that a value of 0 triggers an immediate exit, rather than
> counting down by 2^32? Perhaps it's safer to limit the value to 1
> instead of 0.
>
>> +}
>> +
>> /*
>> * The guest has exited. See if we can fix it or if we need userspace
>> * assistance.
>> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>> vmx->nested.nested_run_pending = 0;
>>
>> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
>> + vmx->nested.nested_vmx_exit = true;
>
> I think this assignment should be in nested_vmx_vmexit, since it is
> called from other places as well.
>
>> nested_vmx_vmexit(vcpu);
>> return 1;
>> }
>> + vmx->nested.nested_vmx_exit = false;
>>
>> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
>> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
>> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>> debugctlmsr = get_debugctlmsr();
>>
>> vmx->__launched = vmx->loaded_vmcs->launched;
>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
>> + nested_adjust_preemption_timer(vcpu);
>
> Please leave the assignment to __launched last, since it's already
> initializing the asm below.
>
> I don't like the is_guest_mode check here... Maybe it's
> micro-optimizing, but I wonder if we already do too many checks in
> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
>
> Alternatively, we could change nested_vmx_exit to an enum in struct
> vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in
> vmx_handle_exit. Then we could check directly for L0->L2 and not adjust
> the preemption timer in other cases. In fact, I suspect this enum could
> replace HF_GUEST_MASK altogether. However, this would require some
> other, more complicated, changes to svm.c.
>
> Gleb, what do you think?
>
> Paolo
>
>> asm(
>> /* Store host registers */
>> "push %%" _ASM_DX "; push %%" _ASM_BP ";"
>> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> u32 exec_control;
>> + u32 exit_control;
>>
>> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>> * bits are further modified by vmx_set_efer() below.
>> */
>> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>> + exit_control = vmcs_config.vmexit_ctrl;
>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
>> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>>
>> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>> * emulated by vmx_set_efer(), below.
>> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> vmcs12->guest_pending_dbg_exceptions =
>> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>>
>> + if ((vmcs12->pin_based_vm_exec_control &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) &&
>> + (vmcs12->vm_exit_controls &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
>> + vmcs12->vmx_preemption_timer_value =
>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> +
>> /*
>> * In some cases (usually, nested EPT), L2 is allowed to change its
>> * own CR3 without exiting. If it has changed it, we must keep it.
>>
>
--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-15 12:31 ` Gleb Natapov
@ 2013-09-16 5:49 ` Arthur Chunqi Li
2013-09-16 6:20 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Arthur Chunqi Li @ 2013-09-16 5:49 UTC (permalink / raw)
To: Gleb Natapov; +Cc: kvm, Jan Kiszka, Paolo Bonzini, Yang Zhang
On Sun, Sep 15, 2013 at 8:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote:
>> This patch contains the following two changes:
>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
>> with some reasons not emulated by L1, preemption timer value should
>> be save in such exits.
>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
>> to nVMX.
>>
>> With this patch, nested VMX preemption timer features are fully
>> supported.
>>
>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>> ---
>> ChangeLog to v3:
>> Move nested_adjust_preemption_timer to the latest place just before vmenter.
>> Some minor changes.
>>
>> arch/x86/include/uapi/asm/msr-index.h | 1 +
>> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
>> 2 files changed, 48 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
>> index bb04650..b93e09a 100644
>> --- a/arch/x86/include/uapi/asm/msr-index.h
>> +++ b/arch/x86/include/uapi/asm/msr-index.h
>> @@ -536,6 +536,7 @@
>>
>> /* MSR_IA32_VMX_MISC bits */
>> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
>> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
>> /* AMD-V MSRs */
>>
>> #define MSR_VM_CR 0xc0010114
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 1f1da43..f364d16 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -374,6 +374,8 @@ struct nested_vmx {
>> */
>> struct page *apic_access_page;
>> u64 msr_ia32_feature_control;
>> + /* Set if vmexit is L2->L1 */
>> + bool nested_vmx_exit;
> Do not see why it is needed, see bellow.
>
>> };
>>
>> #define POSTED_INTR_ON 0
>> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>> #ifdef CONFIG_X86_64
>> VM_EXIT_HOST_ADDR_SPACE_SIZE |
>> #endif
>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + if (!(nested_vmx_pinbased_ctls_high &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
>> + !(nested_vmx_exit_ctls_high &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
>> + nested_vmx_exit_ctls_high &=
>> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
>> + nested_vmx_pinbased_ctls_high &=
>> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
>> + }
>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>> VM_EXIT_LOAD_IA32_EFER);
>>
>> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>> }
>>
>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
>> +{
>> + u64 delta_tsc_l1;
>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>> +
>> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
>> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
>> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
>> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>> + if (preempt_val_l2 <= preempt_val_l1)
>> + preempt_val_l2 = 0;
>> + else
>> + preempt_val_l2 -= preempt_val_l1;
>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>> +}
>> +
>> /*
>> * The guest has exited. See if we can fix it or if we need userspace
>> * assistance.
>> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>> vmx->nested.nested_run_pending = 0;
>>
>> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
>> + vmx->nested.nested_vmx_exit = true;
>> nested_vmx_vmexit(vcpu);
>> return 1;
>> }
>> + vmx->nested.nested_vmx_exit = false;
>>
>> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
>> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
>> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>> debugctlmsr = get_debugctlmsr();
>>
>> vmx->__launched = vmx->loaded_vmcs->launched;
>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> How is_guest_mode() and nested_vmx_exi can be both true? The only place
> nested_vmx_exit is set to true is just before call to
> nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes
> is_guest_mode() false. To enter guest mode again at least one other
> vmexit from L1 to L0 is needed at which point nested_vmx_exit will be
> reset to false again.
>
> If you want to avoid calling nested_adjust_preemption_timer() during
> vmlaunch/vmresume emulation (and it looks like this is what you are
> trying to achieve here) you can check nested_run_pending.
Besides vmlaunch/vmresume emulation, every exit from L2->L1 should not
call nested_adjust_preemption_timer(), this function is just used to
adjust preemption timer when L2->L0->L2. Can nested_run_pending
distinguish this?
Arthur
>
>
>> + nested_adjust_preemption_timer(vcpu);
>> asm(
>> /* Store host registers */
>> "push %%" _ASM_DX "; push %%" _ASM_BP ";"
>> @@ -7518,6 +7552,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> u32 exec_control;
>> + u32 exit_control;
>>
>> vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> @@ -7691,7 +7726,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>> * bits are further modified by vmx_set_efer() below.
>> */
>> - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>> + exit_control = vmcs_config.vmexit_ctrl;
>> + if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
>> + exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>> + vmcs_write32(VM_EXIT_CONTROLS, exit_control);
>>
>> /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
>> * emulated by vmx_set_efer(), below.
>> @@ -8090,6 +8128,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> vmcs12->guest_pending_dbg_exceptions =
>> vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>>
>> + if ((vmcs12->pin_based_vm_exec_control &
>> + PIN_BASED_VMX_PREEMPTION_TIMER) &&
>> + (vmcs12->vm_exit_controls &
>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
>> + vmcs12->vmx_preemption_timer_value =
>> + vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>> +
>> /*
>> * In some cases (usually, nested EPT), L2 is allowed to change its
>> * own CR3 without exiting. If it has changed it, we must keep it.
>> --
>> 1.7.9.5
>
> --
> Gleb.
--
Arthur Chunqi Li
Department of Computer Science
School of EECS
Peking University
Beijing, China
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 5:49 ` Arthur Chunqi Li
@ 2013-09-16 6:20 ` Gleb Natapov
0 siblings, 0 replies; 15+ messages in thread
From: Gleb Natapov @ 2013-09-16 6:20 UTC (permalink / raw)
To: Arthur Chunqi Li; +Cc: kvm, Jan Kiszka, Paolo Bonzini, Yang Zhang
On Mon, Sep 16, 2013 at 01:49:41PM +0800, Arthur Chunqi Li wrote:
> On Sun, Sep 15, 2013 at 8:31 PM, Gleb Natapov <gleb@redhat.com> wrote:
> > On Fri, Sep 06, 2013 at 10:04:51AM +0800, Arthur Chunqi Li wrote:
> >> This patch contains the following two changes:
> >> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> >> with some reasons not emulated by L1, preemption timer value should
> >> be save in such exits.
> >> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> >> to nVMX.
> >>
> >> With this patch, nested VMX preemption timer features are fully
> >> supported.
> >>
> >> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> >> ---
> >> ChangeLog to v3:
> >> Move nested_adjust_preemption_timer to the latest place just before vmenter.
> >> Some minor changes.
> >>
> >> arch/x86/include/uapi/asm/msr-index.h | 1 +
> >> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
> >> 2 files changed, 48 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> >> index bb04650..b93e09a 100644
> >> --- a/arch/x86/include/uapi/asm/msr-index.h
> >> +++ b/arch/x86/include/uapi/asm/msr-index.h
> >> @@ -536,6 +536,7 @@
> >>
> >> /* MSR_IA32_VMX_MISC bits */
> >> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> >> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
> >> /* AMD-V MSRs */
> >>
> >> #define MSR_VM_CR 0xc0010114
> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >> index 1f1da43..f364d16 100644
> >> --- a/arch/x86/kvm/vmx.c
> >> +++ b/arch/x86/kvm/vmx.c
> >> @@ -374,6 +374,8 @@ struct nested_vmx {
> >> */
> >> struct page *apic_access_page;
> >> u64 msr_ia32_feature_control;
> >> + /* Set if vmexit is L2->L1 */
> >> + bool nested_vmx_exit;
> > Do not see why it is needed, see bellow.
> >
> >> };
> >>
> >> #define POSTED_INTR_ON 0
> >> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >> #ifdef CONFIG_X86_64
> >> VM_EXIT_HOST_ADDR_SPACE_SIZE |
> >> #endif
> >> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> >> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> >> + if (!(nested_vmx_pinbased_ctls_high &
> >> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
> >> + !(nested_vmx_exit_ctls_high &
> >> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
> >> + nested_vmx_exit_ctls_high &=
> >> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
> >> + nested_vmx_pinbased_ctls_high &=
> >> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
> >> + }
> >> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> >> VM_EXIT_LOAD_IA32_EFER);
> >>
> >> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> >> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
> >> }
> >>
> >> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> >> +{
> >> + u64 delta_tsc_l1;
> >> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
> >> +
> >> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> >> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> >> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> >> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
> >> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
> >> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> >> + if (preempt_val_l2 <= preempt_val_l1)
> >> + preempt_val_l2 = 0;
> >> + else
> >> + preempt_val_l2 -= preempt_val_l1;
> >> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
> >> +}
> >> +
> >> /*
> >> * The guest has exited. See if we can fix it or if we need userspace
> >> * assistance.
> >> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> >> vmx->nested.nested_run_pending = 0;
> >>
> >> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
> >> + vmx->nested.nested_vmx_exit = true;
> >> nested_vmx_vmexit(vcpu);
> >> return 1;
> >> }
> >> + vmx->nested.nested_vmx_exit = false;
> >>
> >> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
> >> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> >> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> >> debugctlmsr = get_debugctlmsr();
> >>
> >> vmx->__launched = vmx->loaded_vmcs->launched;
> >> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> > How is_guest_mode() and nested_vmx_exi can be both true? The only place
> > nested_vmx_exit is set to true is just before call to
> > nested_vmx_vmexit(). The firs thing nested_vmx_vmexit() does is makes
> > is_guest_mode() false. To enter guest mode again at least one other
> > vmexit from L1 to L0 is needed at which point nested_vmx_exit will be
> > reset to false again.
> >
> > If you want to avoid calling nested_adjust_preemption_timer() during
> > vmlaunch/vmresume emulation (and it looks like this is what you are
> > trying to achieve here) you can check nested_run_pending.
> Besides vmlaunch/vmresume emulation, every exit from L2->L1 should not
> call nested_adjust_preemption_timer(), this function is just used to
> adjust preemption timer when L2->L0->L2. Can nested_run_pending
> distinguish this?
>
On L2->L1 exit is_guest_mode() will be false during vmx_vcpu_run().
On L1->L2 entry nested_run_pending will be true. On L0->L2 entry
is_guest_mode() == true && nested_run_pending == false.
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 5:42 ` Arthur Chunqi Li
@ 2013-09-16 7:18 ` Jan Kiszka
0 siblings, 0 replies; 15+ messages in thread
From: Jan Kiszka @ 2013-09-16 7:18 UTC (permalink / raw)
To: Arthur Chunqi Li; +Cc: Paolo Bonzini, kvm, Gleb Natapov, Yang Zhang
[-- Attachment #1: Type: text/plain, Size: 3814 bytes --]
On 2013-09-16 07:42, Arthur Chunqi Li wrote:
> On Sat, Sep 14, 2013 at 1:15 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
>>> This patch contains the following two changes:
>>> 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
>>> with some reasons not emulated by L1, preemption timer value should
>>> be save in such exits.
>>> 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
>>> to nVMX.
>>>
>>> With this patch, nested VMX preemption timer features are fully
>>> supported.
>>>
>>> Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
>>> ---
>>> ChangeLog to v3:
>>> Move nested_adjust_preemption_timer to the latest place just before vmenter.
>>> Some minor changes.
>>>
>>> arch/x86/include/uapi/asm/msr-index.h | 1 +
>>> arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
>>> 2 files changed, 48 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
>>> index bb04650..b93e09a 100644
>>> --- a/arch/x86/include/uapi/asm/msr-index.h
>>> +++ b/arch/x86/include/uapi/asm/msr-index.h
>>> @@ -536,6 +536,7 @@
>>>
>>> /* MSR_IA32_VMX_MISC bits */
>>> #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
>>> +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
>>> /* AMD-V MSRs */
>>>
>>> #define MSR_VM_CR 0xc0010114
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 1f1da43..f364d16 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -374,6 +374,8 @@ struct nested_vmx {
>>> */
>>> struct page *apic_access_page;
>>> u64 msr_ia32_feature_control;
>>> + /* Set if vmexit is L2->L1 */
>>> + bool nested_vmx_exit;
>>> };
>>>
>>> #define POSTED_INTR_ON 0
>>> @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>> #ifdef CONFIG_X86_64
>>> VM_EXIT_HOST_ADDR_SPACE_SIZE |
>>> #endif
>>> - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
>>> + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
>>> + if (!(nested_vmx_pinbased_ctls_high &
>>> + PIN_BASED_VMX_PREEMPTION_TIMER) ||
>>> + !(nested_vmx_exit_ctls_high &
>>> + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
>>
>> Align this under the other "!". Also, I prefer to have one long line
>> for the whole "!(... & ...) ||" (and likewise below), but I don't know
>> if Gleb agrees
>>
>>> + nested_vmx_exit_ctls_high &=
>>> + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
>>
>> Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and
>> likewise elsewhere in the patch.
>>
>>> + nested_vmx_pinbased_ctls_high &=
>>> + (~PIN_BASED_VMX_PREEMPTION_TIMER);
>>> + }
>>> nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>> VM_EXIT_LOAD_IA32_EFER);
>>>
>>> @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
>>> *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
>>> }
>>>
>>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
>>> +{
>>> + u64 delta_tsc_l1;
>>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>>
>> Should this exit immediately if the preemption timer pin-based control
>> is disabled?
> Hi Paolo,
> How can I get pin-based control here from "struct kvm_vcpu *vcpu"?
You can find get_vmcs12(vcpu)->pin_based_vm_exec_control in existing code.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 263 bytes --]
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-13 17:15 ` Paolo Bonzini
2013-09-14 7:44 ` Jan Kiszka
2013-09-16 5:42 ` Arthur Chunqi Li
@ 2013-09-16 7:44 ` Gleb Natapov
2013-09-16 8:58 ` Paolo Bonzini
2 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2013-09-16 7:44 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
On Fri, Sep 13, 2013 at 07:15:11PM +0200, Paolo Bonzini wrote:
> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
> > This patch contains the following two changes:
> > 1. Fix the bug in nested preemption timer support. If vmexit L2->L0
> > with some reasons not emulated by L1, preemption timer value should
> > be save in such exits.
> > 2. Add support of "Save VMX-preemption timer value" VM-Exit controls
> > to nVMX.
> >
> > With this patch, nested VMX preemption timer features are fully
> > supported.
> >
> > Signed-off-by: Arthur Chunqi Li <yzt356@gmail.com>
> > ---
> > ChangeLog to v3:
> > Move nested_adjust_preemption_timer to the latest place just before vmenter.
> > Some minor changes.
> >
> > arch/x86/include/uapi/asm/msr-index.h | 1 +
> > arch/x86/kvm/vmx.c | 49 +++++++++++++++++++++++++++++++--
> > 2 files changed, 48 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
> > index bb04650..b93e09a 100644
> > --- a/arch/x86/include/uapi/asm/msr-index.h
> > +++ b/arch/x86/include/uapi/asm/msr-index.h
> > @@ -536,6 +536,7 @@
> >
> > /* MSR_IA32_VMX_MISC bits */
> > #define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
> > +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
> > /* AMD-V MSRs */
> >
> > #define MSR_VM_CR 0xc0010114
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 1f1da43..f364d16 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -374,6 +374,8 @@ struct nested_vmx {
> > */
> > struct page *apic_access_page;
> > u64 msr_ia32_feature_control;
> > + /* Set if vmexit is L2->L1 */
> > + bool nested_vmx_exit;
> > };
> >
> > #define POSTED_INTR_ON 0
> > @@ -2204,7 +2206,17 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> > #ifdef CONFIG_X86_64
> > VM_EXIT_HOST_ADDR_SPACE_SIZE |
> > #endif
> > - VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
> > + VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
> > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
> > + if (!(nested_vmx_pinbased_ctls_high &
> > + PIN_BASED_VMX_PREEMPTION_TIMER) ||
> > + !(nested_vmx_exit_ctls_high &
> > + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) {
>
> Align this under the other "!". Also, I prefer to have one long line
> for the whole "!(... & ...) ||" (and likewise below), but I don't know
> if Gleb agrees
>
!(... & ...) ||
!(... & ...)
fits perfectly to 80 chars.
> > + nested_vmx_exit_ctls_high &=
> > + (~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
>
> Please remove parentheses around ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, and
> likewise elsewhere in the patch.
>
and line break too.
> > + nested_vmx_pinbased_ctls_high &=
> > + (~PIN_BASED_VMX_PREEMPTION_TIMER);
> > + }
> > nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> > VM_EXIT_LOAD_IA32_EFER);
> >
> > @@ -6707,6 +6719,24 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
> > *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
> > }
> >
> > +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
> > +{
> > + u64 delta_tsc_l1;
> > + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>
> Should this exit immediately if the preemption timer pin-based control
> is disabled?
>
> > + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
> > + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
> > + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
> > + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
> > + native_read_tsc()) - vcpu->arch.last_guest_tsc;
>
> Please format this like:
>
> delta_tsc_l1 =
> kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc())
> - vcpu->arch.last_guest_tsc;
>
And call vmx_read_l1_tsc() directly. Actually you can even use
to_vmx(vcpu)->nested.vmcs01_tsc_offset directly here since the function
will be called only when is_guest_mode() == true, but vmx_read_l1_tsc()
may be more clear here and compile should optimize second is_guest_mode()
check anyway.
> > + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
> > + if (preempt_val_l2 <= preempt_val_l1)
> > + preempt_val_l2 = 0;
> > + else
> > + preempt_val_l2 -= preempt_val_l1;
> > + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>
> Did you test that a value of 0 triggers an immediate exit, rather than
> counting down by 2^32? Perhaps it's safer to limit the value to 1
> instead of 0.
>
> > +}
> > +
> > /*
> > * The guest has exited. See if we can fix it or if we need userspace
> > * assistance.
> > @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
> > vmx->nested.nested_run_pending = 0;
> >
> > if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
> > + vmx->nested.nested_vmx_exit = true;
>
> I think this assignment should be in nested_vmx_vmexit, since it is
> called from other places as well.
>
> > nested_vmx_vmexit(vcpu);
> > return 1;
> > }
> > + vmx->nested.nested_vmx_exit = false;
> >
> > if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
> > vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
> > @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> > debugctlmsr = get_debugctlmsr();
> >
> > vmx->__launched = vmx->loaded_vmcs->launched;
> > + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> > + nested_adjust_preemption_timer(vcpu);
>
> Please leave the assignment to __launched last, since it's already
> initializing the asm below.
>
> I don't like the is_guest_mode check here... Maybe it's
> micro-optimizing, but I wonder if we already do too many checks in
> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
>
Why this will be more efficient that HF_GUEST_MASK check?
> Alternatively, we could change nested_vmx_exit to an enum in struct
> vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in
> vmx_handle_exit. Then we could check directly for L0->L2 and not adjust
> the preemption timer in other cases. In fact, I suspect this enum could
> replace HF_GUEST_MASK altogether. However, this would require some
> other, more complicated, changes to svm.c.
>
> Gleb, what do you think?
>
I do not see why nested_vmx_exit is necessary at all yet. We can detect
all aforementioned cases without.
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 7:44 ` Gleb Natapov
@ 2013-09-16 8:58 ` Paolo Bonzini
2013-09-16 9:09 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2013-09-16 8:58 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
Il 16/09/2013 09:44, Gleb Natapov ha scritto:
> On Fri, Sep 13, 2013 at 07:15:11PM +0200, Paolo Bonzini wrote:
>> Il 06/09/2013 04:04, Arthur Chunqi Li ha scritto:
>>> +static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu)
>>> +{
>>> + u64 delta_tsc_l1;
>>> + u32 preempt_val_l1, preempt_val_l2, preempt_scale;
>>
>> Should this exit immediately if the preemption timer pin-based control
>> is disabled?
>>
>>> + preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) &
>>> + MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE;
>>> + preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE);
>>> + delta_tsc_l1 = kvm_x86_ops->read_l1_tsc(vcpu,
>>> + native_read_tsc()) - vcpu->arch.last_guest_tsc;
>>
>> Please format this like:
>>
>> delta_tsc_l1 =
>> kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc())
>> - vcpu->arch.last_guest_tsc;
>>
> And call vmx_read_l1_tsc() directly. Actually you can even use
> to_vmx(vcpu)->nested.vmcs01_tsc_offset directly here since the function
> will be called only when is_guest_mode() == true, but vmx_read_l1_tsc()
> may be more clear here and compile should optimize second is_guest_mode()
> check anyway.
Right. Though I'm not that concerned about optimizing L1->L2 and L0->L2
entries; I'm more concerned about not slowing down L0->L1 (which
includes the non-nested case, of course).
>>> + preempt_val_l1 = delta_tsc_l1 >> preempt_scale;
>>> + if (preempt_val_l2 <= preempt_val_l1)
>>> + preempt_val_l2 = 0;
>>> + else
>>> + preempt_val_l2 -= preempt_val_l1;
>>> + vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2);
>>
>> Did you test that a value of 0 triggers an immediate exit, rather than
>> counting down by 2^32? Perhaps it's safer to limit the value to 1
>> instead of 0.
>>
>>> +}
>>> +
>>> /*
>>> * The guest has exited. See if we can fix it or if we need userspace
>>> * assistance.
>>> @@ -6736,9 +6766,11 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>>> vmx->nested.nested_run_pending = 0;
>>>
>>> if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
>>> + vmx->nested.nested_vmx_exit = true;
>>
>> I think this assignment should be in nested_vmx_vmexit, since it is
>> called from other places as well.
>>
>>> nested_vmx_vmexit(vcpu);
>>> return 1;
>>> }
>>> + vmx->nested.nested_vmx_exit = false;
>>>
>>> if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
>>> vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
>>> @@ -7132,6 +7164,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>> debugctlmsr = get_debugctlmsr();
>>>
>>> vmx->__launched = vmx->loaded_vmcs->launched;
>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
>>> + nested_adjust_preemption_timer(vcpu);
>>
>> Please leave the assignment to __launched last, since it's already
>> initializing the asm below.
>>
>> I don't like the is_guest_mode check here... Maybe it's
>> micro-optimizing, but I wonder if we already do too many checks in
>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
>>
> Why this will be more efficient that HF_GUEST_MASK check?
Because we have already loaded vmx->loaded_vmcs, so it's one memory
access less.
>> Alternatively, we could change nested_vmx_exit to an enum in struct
>> vcpu_vmx (with values for L0->L1, L0->L2, L1->L2) that is initialized in
>> vmx_handle_exit. Then we could check directly for L0->L2 and not adjust
>> the preemption timer in other cases. In fact, I suspect this enum could
>> replace HF_GUEST_MASK altogether. However, this would require some
>> other, more complicated, changes to svm.c.
>>
>> Gleb, what do you think?
>>
> I do not see why nested_vmx_exit is necessary at all yet. We can detect
> all aforementioned cases without.
How do you distinguish L0->L2 from L1->L2 in vmx_vcpu_run?
Paolo
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 8:58 ` Paolo Bonzini
@ 2013-09-16 9:09 ` Gleb Natapov
2013-09-16 9:49 ` Paolo Bonzini
0 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2013-09-16 9:09 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote:
> >>> vmx->__launched = vmx->loaded_vmcs->launched;
> >>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> >>> + nested_adjust_preemption_timer(vcpu);
> >>
> >> Please leave the assignment to __launched last, since it's already
> >> initializing the asm below.
> >>
> >> I don't like the is_guest_mode check here... Maybe it's
> >> micro-optimizing, but I wonder if we already do too many checks in
> >> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
> >> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
> >>
> > Why this will be more efficient that HF_GUEST_MASK check?
>
> Because we have already loaded vmx->loaded_vmcs, so it's one memory
> access less.
>
But we will have to load vmx->vmcs1 instead :) Looks like very minor
optimization if at all. If we could avoid additional if() at all
somehow, may be mimicking vcpu->requests in vmx to get rid of most ifs
there.
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 9:09 ` Gleb Natapov
@ 2013-09-16 9:49 ` Paolo Bonzini
2013-09-16 10:44 ` Gleb Natapov
0 siblings, 1 reply; 15+ messages in thread
From: Paolo Bonzini @ 2013-09-16 9:49 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
Il 16/09/2013 11:09, Gleb Natapov ha scritto:
> On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote:
>>>>> vmx->__launched = vmx->loaded_vmcs->launched;
>>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
>>>>> + nested_adjust_preemption_timer(vcpu);
>>>>
>>>> Please leave the assignment to __launched last, since it's already
>>>> initializing the asm below.
>>>>
>>>> I don't like the is_guest_mode check here... Maybe it's
>>>> micro-optimizing, but I wonder if we already do too many checks in
>>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
>>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
>>>>
>>> Why this will be more efficient that HF_GUEST_MASK check?
>>
>> Because we have already loaded vmx->loaded_vmcs, so it's one memory
>> access less.
>>
> But we will have to load vmx->vmcs1 instead :)
That's not a memory load, it's an add.
> Looks like very minor
> optimization if at all. If we could avoid additional if() at all
> somehow, may be mimicking vcpu->requests in vmx to get rid of most ifs
> there.
Yes.
Paolo
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 9:49 ` Paolo Bonzini
@ 2013-09-16 10:44 ` Gleb Natapov
2013-09-16 10:50 ` Paolo Bonzini
0 siblings, 1 reply; 15+ messages in thread
From: Gleb Natapov @ 2013-09-16 10:44 UTC (permalink / raw)
To: Paolo Bonzini; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
On Mon, Sep 16, 2013 at 11:49:40AM +0200, Paolo Bonzini wrote:
> Il 16/09/2013 11:09, Gleb Natapov ha scritto:
> > On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote:
> >>>>> vmx->__launched = vmx->loaded_vmcs->launched;
> >>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
> >>>>> + nested_adjust_preemption_timer(vcpu);
> >>>>
> >>>> Please leave the assignment to __launched last, since it's already
> >>>> initializing the asm below.
> >>>>
> >>>> I don't like the is_guest_mode check here... Maybe it's
> >>>> micro-optimizing, but I wonder if we already do too many checks in
> >>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
> >>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
> >>>>
> >>> Why this will be more efficient that HF_GUEST_MASK check?
> >>
> >> Because we have already loaded vmx->loaded_vmcs, so it's one memory
> >> access less.
> >>
> > But we will have to load vmx->vmcs1 instead :)
>
> That's not a memory load, it's an add.
>
You assume that vmx->loaded_vmcs and vmx will be both in registers here,
isn't it too much to assume?
--
Gleb.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer
2013-09-16 10:44 ` Gleb Natapov
@ 2013-09-16 10:50 ` Paolo Bonzini
0 siblings, 0 replies; 15+ messages in thread
From: Paolo Bonzini @ 2013-09-16 10:50 UTC (permalink / raw)
To: Gleb Natapov; +Cc: Arthur Chunqi Li, kvm, jan.kiszka, yang.z.zhang
Il 16/09/2013 12:44, Gleb Natapov ha scritto:
> On Mon, Sep 16, 2013 at 11:49:40AM +0200, Paolo Bonzini wrote:
>> Il 16/09/2013 11:09, Gleb Natapov ha scritto:
>>> On Mon, Sep 16, 2013 at 10:58:12AM +0200, Paolo Bonzini wrote:
>>>>>>> vmx->__launched = vmx->loaded_vmcs->launched;
>>>>>>> + if (is_guest_mode(vcpu) && !(vmx->nested.nested_vmx_exit))
>>>>>>> + nested_adjust_preemption_timer(vcpu);
>>>>>>
>>>>>> Please leave the assignment to __launched last, since it's already
>>>>>> initializing the asm below.
>>>>>>
>>>>>> I don't like the is_guest_mode check here... Maybe it's
>>>>>> micro-optimizing, but I wonder if we already do too many checks in
>>>>>> vmx_vcpu_run... For example, is_guest_mode could be changed (I think)
>>>>>> to a check for "vmx->loaded_vmcs == &vmx->vmcs1".
>>>>>>
>>>>> Why this will be more efficient that HF_GUEST_MASK check?
>>>>
>>>> Because we have already loaded vmx->loaded_vmcs, so it's one memory
>>>> access less.
>>>>
>>> But we will have to load vmx->vmcs1 instead :)
>>
>> That's not a memory load, it's an add.
>>
> You assume that vmx->loaded_vmcs and vmx will be both in registers here,
> isn't it too much to assume?
Both of them are needed in this very close line:
vmx->__launched = vmx->loaded_vmcs->launched;
So it is not a big assumption---even on 32-bits, perhaps. By contrast,
vcpu->hflags is very unlikely to be in a register.
This is of course mostly pointless alone. But if we could shave 100
cycles off a lightweight vmexit, that could give a measurable
improvement on nested virt.
Paolo
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2013-09-16 10:50 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-06 2:04 [PATCH v4] KVM: nVMX: Fully support of nested VMX preemption timer Arthur Chunqi Li
2013-09-13 17:15 ` Paolo Bonzini
2013-09-14 7:44 ` Jan Kiszka
2013-09-15 7:59 ` Arthur Chunqi Li
2013-09-16 5:42 ` Arthur Chunqi Li
2013-09-16 7:18 ` Jan Kiszka
2013-09-16 7:44 ` Gleb Natapov
2013-09-16 8:58 ` Paolo Bonzini
2013-09-16 9:09 ` Gleb Natapov
2013-09-16 9:49 ` Paolo Bonzini
2013-09-16 10:44 ` Gleb Natapov
2013-09-16 10:50 ` Paolo Bonzini
2013-09-15 12:31 ` Gleb Natapov
2013-09-16 5:49 ` Arthur Chunqi Li
2013-09-16 6:20 ` Gleb Natapov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).