From: Vitaly Kuznetsov <vkuznets@redhat.com>
To: Makarand Sonare <makarandsonare@google.com>
Cc: kvm@vger.kernel.org, pshier@google.com, jmattson@google.com
Subject: Re: [PATCH 1/2] KVM: nVMX: Fix VMX preemption timer migration
Date: Tue, 19 May 2020 02:49:33 +0200 [thread overview]
Message-ID: <875zcsvjdu.fsf@vitty.brq.redhat.com> (raw)
In-Reply-To: <20200518201600.255669-2-makarandsonare@google.com>
Makarand Sonare <makarandsonare@google.com> writes:
> From: Peter Shier <pshier@google.com>
>
> Add new field to hold preemption timer expiration deadline
> appended to struct kvm_vmx_nested_state_data. This is to prevent
> the first VM-Enter after migration from incorrectly restarting the timer
> with the full timer value instead of partially decayed timer value.
> KVM_SET_NESTED_STATE restarts timer using migrated state regardless
> of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
>
> Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")
>
> Signed-off-by: Peter Shier <pshier@google.com>
> Signed-off-by: Makarand Sonare <makarandsonare@google.com>
> Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
> ---
> Documentation/virt/kvm/api.rst | 4 ++
> arch/x86/include/uapi/asm/kvm.h | 2 +
> arch/x86/kvm/vmx/nested.c | 61 ++++++++++++++++++++++++---
> arch/x86/kvm/vmx/vmx.h | 1 +
> arch/x86/kvm/x86.c | 3 +-
> include/uapi/linux/kvm.h | 1 +
> tools/arch/x86/include/uapi/asm/kvm.h | 2 +
> 7 files changed, 67 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index d871dacb984e9..b410815772970 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4326,6 +4326,9 @@ Errors:
> #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
> #define KVM_STATE_NESTED_EVMCS 0x00000004
>
Not your fault but KVM_STATE_NESTED_MTF_PENDING seems to be missing here
> + /* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> + #define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
> +
> #define KVM_STATE_NESTED_FORMAT_VMX 0
> #define KVM_STATE_NESTED_FORMAT_SVM 1
>
> @@ -4346,6 +4349,7 @@ Errors:
> struct kvm_vmx_nested_state_data {
> __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> + __u64 preemption_timer_deadline;
> };
>
> This ioctl copies the vcpu's nested virtualization state from the kernel to
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..20d5832bab215 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,7 @@ struct kvm_sync_regs {
> #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
> #define KVM_STATE_NESTED_EVMCS 0x00000004
> #define KVM_STATE_NESTED_MTF_PENDING 0x00000008
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
and here you don't have the "/* Available with
KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */" comment you put to api.rst. I
think it would be better to keep them in sync.
>
> #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
> #define KVM_STATE_NESTED_SMM_VMXON 0x00000002
> @@ -400,6 +401,7 @@ struct kvm_sync_regs {
> struct kvm_vmx_nested_state_data {
> __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
> + __u64 preemption_timer_deadline;
> };
>
> struct kvm_vmx_nested_state_hdr {
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 51ebb60e1533a..badb82a39ac04 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2092,9 +2092,9 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
> return HRTIMER_NORESTART;
> }
>
> -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
> +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
> + u64 preemption_timeout)
> {
> - u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
> struct vcpu_vmx *vmx = to_vmx(vcpu);
>
> /*
> @@ -3353,8 +3353,24 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
> * the timer.
> */
> vmx->nested.preemption_timer_expired = false;
> - if (nested_cpu_has_preemption_timer(vmcs12))
> - vmx_start_preemption_timer(vcpu);
> + if (nested_cpu_has_preemption_timer(vmcs12)) {
> + u64 timer_value;
> + u64 l1_tsc_value = kvm_read_l1_tsc(vcpu, rdtsc());
> +
> + if (from_vmentry) {
> + timer_value = vmcs12->vmx_preemption_timer_value;
> + vmx->nested.preemption_timer_deadline = timer_value +
> + (l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
> + } else {
> + if ((l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE) >
> + vmx->nested.preemption_timer_deadline)
> + timer_value = 0;
> + else
> + timer_value = vmx->nested.preemption_timer_deadline -
> + (l1_tsc_value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE);
> + }
> + vmx_start_preemption_timer(vcpu, timer_value);
> + }
>
> /*
> * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
> @@ -3962,9 +3978,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
>
> if (nested_cpu_has_preemption_timer(vmcs12) &&
> - vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> + !vmx->nested.nested_run_pending) {
> + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
> vmcs12->vmx_preemption_timer_value =
> vmx_get_preemption_timer_value(vcpu);
> + }
>
> /*
> * In some cases (usually, nested EPT), L2 is allowed to change its
> @@ -5939,6 +5957,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>
> if (vmx->nested.mtf_pending)
> kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
> +
> + if (nested_cpu_has_preemption_timer(vmcs12)) {
> + kvm_state.flags |=
> + KVM_STATE_NESTED_PREEMPTION_TIMER;
> + kvm_state.size = offsetof(struct kvm_nested_state, data.vmx) +
> + offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline);
Hm, here we seem to drop all previous calculations for the
kvm_state.size (like if shadow vmcs was present or not). I think this
should just be
kvm_state.size += sizeof(user_vmx_nested_state.preemption_timer_deadline);
instead.
> + }
> }
> }
>
> @@ -5970,6 +5995,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>
> BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
> BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
> + BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_deadline)
> + != sizeof(vmx->nested.preemption_timer_deadline));
> +
>
> /*
> * Copy over the full allocated size of vmcs12 rather than just the size
> @@ -5985,6 +6013,12 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
> return -EFAULT;
> }
>
> + if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> + if (put_user(vmx->nested.preemption_timer_deadline,
> + &user_vmx_nested_state->preemption_timer_deadline))
> + return -EFAULT;
> + }
> +
> out:
> return kvm_state.size;
> }
> @@ -6056,7 +6090,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
> */
> if (is_smm(vcpu) ?
> (kvm_state->flags &
> - (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
> + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
> + KVM_STATE_NESTED_PREEMPTION_TIMER))
> : kvm_state->hdr.vmx.smm.flags)
> return -EINVAL;
>
> @@ -6146,6 +6181,20 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
> goto error_guest_mode;
> }
>
> + if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +
> + if (kvm_state->size <
> + offsetof(struct kvm_nested_state, data.vmx) +
> + offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_deadline))
This also doesn't seem to be correct. E.g. what if there is no shadow
vmcs data in the saved state?
> + goto error_guest_mode;
> +
> + if (get_user(vmx->nested.preemption_timer_deadline,
> + &user_vmx_nested_state->preemption_timer_deadline)) {
> + ret = -EFAULT;
> + goto error_guest_mode;
> + }
> + }
> +
> if (nested_vmx_check_controls(vcpu, vmcs12) ||
> nested_vmx_check_host_state(vcpu, vmcs12) ||
> nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 298ddef79d009..db697400755fb 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -169,6 +169,7 @@ struct nested_vmx {
> u16 posted_intr_nv;
>
> struct hrtimer preemption_timer;
> + u64 preemption_timer_deadline;
> bool preemption_timer_expired;
>
> /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 471fccf7f8501..ba9e62ffbb4cd 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3418,6 +3418,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_MSR_PLATFORM_INFO:
> case KVM_CAP_EXCEPTION_PAYLOAD:
> case KVM_CAP_SET_GUEST_DEBUG:
> + case KVM_CAP_NESTED_STATE_PREEMPTION_TIMER:
> r = 1;
> break;
> case KVM_CAP_SYNC_REGS:
> @@ -4626,7 +4627,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
>
> if (kvm_state.flags &
> ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE
> - | KVM_STATE_NESTED_EVMCS))
> + | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_PREEMPTION_TIMER))
> break;
>
> /* nested_run_pending implies guest_mode. */
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index ac9eba0289d1b..0868dce12a715 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1018,6 +1018,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_S390_PROTECTED 180
> #define KVM_CAP_PPC_SECURE_GUEST 181
> #define KVM_CAP_HALT_POLL 182
> +#define KVM_CAP_NESTED_STATE_PREEMPTION_TIMER 183
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 3f3f780c8c650..60701178b9cc1 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -391,6 +391,8 @@ struct kvm_sync_regs {
> #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
> #define KVM_STATE_NESTED_EVMCS 0x00000004
> #define KVM_STATE_NESTED_MTF_PENDING 0x00000008
> +/* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
> +#define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
>
> #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
> #define KVM_STATE_NESTED_SMM_VMXON 0x00000002
It seems tools/arch/x86/include/uapi/asm/kvm.h is usually updated
separately (not sure it's the right thing to do though..)
--
Vitaly
next prev parent reply other threads:[~2020-05-19 0:49 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-18 20:15 [PATCH 0/2] Fix VMX preemption timer migration Makarand Sonare
2020-05-18 20:15 ` [PATCH 1/2] KVM: nVMX: " Makarand Sonare
2020-05-19 0:49 ` Vitaly Kuznetsov [this message]
2020-05-19 7:18 ` Krish Sadhukhan
2020-05-18 20:16 ` [PATCH 2/2] KVM: selftests: VMX preemption timer migration test Makarand Sonare
2020-05-19 0:56 ` Vitaly Kuznetsov
2020-05-20 2:14 ` Krish Sadhukhan
2020-05-20 23:18 ` Makarand Sonare
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=875zcsvjdu.fsf@vitty.brq.redhat.com \
--to=vkuznets@redhat.com \
--cc=jmattson@google.com \
--cc=kvm@vger.kernel.org \
--cc=makarandsonare@google.com \
--cc=pshier@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.