* [kvm PATCH] KVM: nVMX - enable VMX preemption timer migration
@ 2020-04-22 20:50 Makarand Sonare
2020-04-23 15:33 ` Sean Christopherson
0 siblings, 1 reply; 3+ messages in thread
From: Makarand Sonare @ 2020-04-22 20:50 UTC (permalink / raw)
To: kvm, pshier, jmattson; +Cc: Makarand Sonare
From: Peter Shier <pshier@google.com>
Add new field to hold preemption timer remaining until expiration
appended to struct kvm_vmx_nested_state_data. This is to prevent
the second (and later) VM-Enter after migration from restarting the timer
with wrong value. KVM_SET_NESTED_STATE restarts timer using migrated
state regardless of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
Fixes: cf8b84f48a593 ("kvm: nVMX: Prepare for checkpointing L2 state")
Signed-off-by: Peter Shier <pshier@google.com>
Signed-off-by: Makarand Sonare <makarandsonare@google.com>
Change-Id: I6446aba5a547afa667f0ef4620b1b76115bf3753
---
Documentation/virt/kvm/api.rst | 4 ++
arch/x86/include/uapi/asm/kvm.h | 2 +
arch/x86/kvm/vmx/nested.c | 59 +++++++++++++++++++++++----
arch/x86/kvm/vmx/vmx.h | 1 +
arch/x86/kvm/x86.c | 1 +
include/uapi/linux/kvm.h | 1 +
tools/arch/x86/include/uapi/asm/kvm.h | 2 +
7 files changed, 63 insertions(+), 7 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index efbbe570aa9b7..89415f20fd089 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4326,6 +4326,9 @@ Errors:
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
+ /* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
+ #define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
+
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -4346,6 +4349,7 @@ Errors:
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u32 preemption_timer_remaining;
};
This ioctl copies the vcpu's nested virtualization state from the kernel to
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c650..abaddea6f8ff4 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -391,6 +391,7 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
+#define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
@@ -400,6 +401,7 @@ struct kvm_sync_regs {
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u32 preemption_timer_remaining;
};
struct kvm_vmx_nested_state_hdr {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index cbc9ea2de28f9..a5207df73f015 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2014,15 +2014,16 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
container_of(timer, struct vcpu_vmx, nested.preemption_timer);
vmx->nested.preemption_timer_expired = true;
+ vmx->nested.preemption_timer_remaining = 0;
kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
kvm_vcpu_kick(&vmx->vcpu);
return HRTIMER_NORESTART;
}
-static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
+static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
+ u64 preemption_timeout)
{
- u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
@@ -3293,8 +3294,15 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
* the timer.
*/
vmx->nested.preemption_timer_expired = false;
- if (nested_cpu_has_preemption_timer(vmcs12))
- vmx_start_preemption_timer(vcpu);
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ u64 timer_value;
+
+ if (from_vmentry)
+ timer_value = vmcs12->vmx_preemption_timer_value;
+ else
+ timer_value = vmx->nested.preemption_timer_remaining;
+ vmx_start_preemption_timer(vcpu, timer_value);
+ }
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
@@ -3889,9 +3897,13 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
if (nested_cpu_has_preemption_timer(vmcs12) &&
- vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+ !vmx->nested.nested_run_pending) {
+ vmx->nested.preemption_timer_remaining =
+ vmx_get_preemption_timer_value(vcpu);
+ if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
vmcs12->vmx_preemption_timer_value =
- vmx_get_preemption_timer_value(vcpu);
+ vmx->nested.preemption_timer_remaining;
+ }
/*
* In some cases (usually, nested EPT), L2 is allowed to change its
@@ -5759,6 +5771,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (vmx->nested.mtf_pending)
kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
+
+ if (nested_cpu_has_preemption_timer(vmcs12)) {
+ kvm_state.flags |=
+ KVM_STATE_NESTED_PREEMPTION_TIMER;
+ kvm_state.size +=
+ sizeof(user_vmx_nested_state->preemption_timer_remaining);
+ }
}
}
@@ -5790,6 +5809,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
+ BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_remaining)
+ != sizeof(vmx->nested.preemption_timer_remaining));
+
/*
* Copy over the full allocated size of vmcs12 rather than just the size
@@ -5805,6 +5827,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
return -EFAULT;
}
+ if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
+ if (copy_to_user(&user_vmx_nested_state->preemption_timer_remaining,
+ &vmx->nested.preemption_timer_remaining,
+ sizeof(vmx->nested.preemption_timer_remaining)))
+ return -EFAULT;
+ }
+
out:
return kvm_state.size;
}
@@ -5876,7 +5905,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
*/
if (is_smm(vcpu) ?
(kvm_state->flags &
- (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
+ (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
+ KVM_STATE_NESTED_PREEMPTION_TIMER))
: kvm_state->hdr.vmx.smm.flags)
return -EINVAL;
@@ -5966,6 +5996,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
goto error_guest_mode;
}
+ if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
+
+ if (kvm_state->size <
+ offsetof(struct kvm_nested_state, hdr.vmx) +
+ offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_remaining))
+ goto error_guest_mode;
+
+ if (copy_from_user(&vmx->nested.preemption_timer_remaining,
+ &user_vmx_nested_state->preemption_timer_remaining,
+ sizeof(user_vmx_nested_state->preemption_timer_remaining))) {
+ ret = -EFAULT;
+ goto error_guest_mode;
+ }
+ }
+
if (nested_vmx_check_controls(vcpu, vmcs12) ||
nested_vmx_check_host_state(vcpu, vmcs12) ||
nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index aab9df55336ef..0098c7dc2e254 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -167,6 +167,7 @@ struct nested_vmx {
u16 posted_intr_nv;
struct hrtimer preemption_timer;
+ u32 preemption_timer_remaining;
bool preemption_timer_expired;
/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 027dfd278a973..c9758ca1b5714 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3374,6 +3374,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
+ case KVM_CAP_NESTED_STATE_PREEMPTION_TIMER:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 428c7dde6b4b3..4d6fc8fe30388 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_VCPU_RESETS 179
#define KVM_CAP_S390_PROTECTED 180
#define KVM_CAP_PPC_SECURE_GUEST 181
+#define KVM_CAP_NESTED_STATE_PREEMPTION_TIMER 182
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c650..60701178b9cc1 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -391,6 +391,8 @@ struct kvm_sync_regs {
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
#define KVM_STATE_NESTED_EVMCS 0x00000004
#define KVM_STATE_NESTED_MTF_PENDING 0x00000008
+/* Available with KVM_CAP_NESTED_STATE_PREEMPTION_TIMER */
+#define KVM_STATE_NESTED_PREEMPTION_TIMER 0x00000010
#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
--
2.26.1.301.g55bc3eb7cb9-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [kvm PATCH] KVM: nVMX - enable VMX preemption timer migration
2020-04-22 20:50 [kvm PATCH] KVM: nVMX - enable VMX preemption timer migration Makarand Sonare
@ 2020-04-23 15:33 ` Sean Christopherson
[not found] ` <CA+qz5spBA0HiXbvtZ-7bvfvf20HbfVaV0UrXF=NOGMA1ptuTHg@mail.gmail.com>
0 siblings, 1 reply; 3+ messages in thread
From: Sean Christopherson @ 2020-04-23 15:33 UTC (permalink / raw)
To: Makarand Sonare; +Cc: kvm, pshier, jmattson
A few comments on the shortlog:
- Use [PATCH] instead of [kvm PATCH], the "kvm" part is redundant with
the subsystem scope. I almost skipped over this patch because my eyes
have been trained to treat [kvm... PATCH] as patches for kvm-unit-tests.
- Use a colon instead of a dash after nVMX.
- The patch should be tagged v2. Again, I almost glossed over this
because I thought it was a resend of v1.
- I wouldn't phrase this as enabling, e.g. nothing prevented migrating
the preemption timer before this patch, it just wasn't migrated
correctly.
E.g.
[PATCH v2] KVM: nVMX: Fix VMX preemption timer migration
On Wed, Apr 22, 2020 at 01:50:30PM -0700, Makarand Sonare wrote:
> From: Peter Shier <pshier@google.com>
>
> Add new field to hold preemption timer remaining until expiration
> appended to struct kvm_vmx_nested_state_data. This is to prevent
> the second (and later) VM-Enter after migration from restarting the timer
This isn't correct. The bug being fixed is that the _first_ VM-Enter after
migration would use the incorrect value, i.e. full value instead of
partially decayed timer. My comment in v1 was that the changelog should
document why it adds a new field to fix the bug as opposed to simply
snapshotting the decayed timer.
> with wrong value. KVM_SET_NESTED_STATE restarts timer using migrated
> state regardless of whether L1 sets VM_EXIT_SAVE_VMX_PREEMPTION_TIMER.
Any plans to enhance the vmx_set_nested_state_test.c to verify this works
as intended?
> struct kvm_vmx_nested_state_hdr {
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index cbc9ea2de28f9..a5207df73f015 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2014,15 +2014,16 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
> container_of(timer, struct vcpu_vmx, nested.preemption_timer);
>
> vmx->nested.preemption_timer_expired = true;
> + vmx->nested.preemption_timer_remaining = 0;
Won't this get overwritten by sync_vmcs02_to_vmcs12()? Unless there is a
corner cases I'm missing, I think it'd be better to omit this so that it's
clear that sync_vmcs02_to_vmcs12() is responsible for snapshotting the
remaining time.
> kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
> kvm_vcpu_kick(&vmx->vcpu);
>
> return HRTIMER_NORESTART;
> }
...
> @@ -5790,6 +5809,9 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
>
> BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
> BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
> + BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_remaining)
> + != sizeof(vmx->nested.preemption_timer_remaining));
> +
>
> /*
> * Copy over the full allocated size of vmcs12 rather than just the size
> @@ -5805,6 +5827,13 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
> return -EFAULT;
> }
>
> + if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> + if (copy_to_user(&user_vmx_nested_state->preemption_timer_remaining,
> + &vmx->nested.preemption_timer_remaining,
> + sizeof(vmx->nested.preemption_timer_remaining)))a
Build tested only, but {get,put}_user() compiles just fine, as requested.
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a2cd413f2901..b2ec62a24f0c 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5970,9 +5970,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
- BUILD_BUG_ON(sizeof(user_vmx_nested_state->preemption_timer_remaining)
- != sizeof(vmx->nested.preemption_timer_remaining));
-
/*
* Copy over the full allocated size of vmcs12 rather than just the size
@@ -5989,9 +5986,8 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
}
if (kvm_state.flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
- if (copy_to_user(&user_vmx_nested_state->preemption_timer_remaining,
- &vmx->nested.preemption_timer_remaining,
- sizeof(vmx->nested.preemption_timer_remaining)))
+ if (put_user(vmx->nested.preemption_timer_remaining,
+ &user_vmx_nested_state->preemption_timer_remaining))
return -EFAULT;
}
@@ -6164,9 +6160,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_remaining))
goto error_guest_mode;
- if (copy_from_user(&vmx->nested.preemption_timer_remaining,
- &user_vmx_nested_state->preemption_timer_remaining,
- sizeof(user_vmx_nested_state->preemption_timer_remaining))) {
+ if (get_user(vmx->nested.preemption_timer_remaining,
+ &user_vmx_nested_state->preemption_timer_remaining)) {
ret = -EFAULT;
goto error_guest_mode;
}
> + return -EFAULT;
> + }
> +
> out:
> return kvm_state.size;
> }
> @@ -5876,7 +5905,8 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
> */
> if (is_smm(vcpu) ?
> (kvm_state->flags &
> - (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
> + (KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING |
> + KVM_STATE_NESTED_PREEMPTION_TIMER))
> : kvm_state->hdr.vmx.smm.flags)
> return -EINVAL;
>
> @@ -5966,6 +5996,21 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
> goto error_guest_mode;
> }
>
> + if (kvm_state->flags & KVM_STATE_NESTED_PREEMPTION_TIMER) {
> +
> + if (kvm_state->size <
> + offsetof(struct kvm_nested_state, hdr.vmx) +
> + offsetofend(struct kvm_vmx_nested_state_data, preemption_timer_remaining))
Too many spaces after 'struct'.
> + goto error_guest_mode;
> +
> + if (copy_from_user(&vmx->nested.preemption_timer_remaining,
> + &user_vmx_nested_state->preemption_timer_remaining,
> + sizeof(user_vmx_nested_state->preemption_timer_remaining))) {
> + ret = -EFAULT;
> + goto error_guest_mode;
> + }
> + }
> +
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [kvm PATCH] KVM: nVMX - enable VMX preemption timer migration
[not found] ` <CA+qz5spBA0HiXbvtZ-7bvfvf20HbfVaV0UrXF=NOGMA1ptuTHg@mail.gmail.com>
@ 2020-04-23 19:58 ` Sean Christopherson
0 siblings, 0 replies; 3+ messages in thread
From: Sean Christopherson @ 2020-04-23 19:58 UTC (permalink / raw)
To: Makarand Sonare; +Cc: kvm, Peter Shier, Jim Mattson
Please read through Thomas' diatribe^W comments on top-posting and HTML
email[*]. I appreciate that it can be frustrating to configure and get
used to, especially in a corporate environment, but it really does help
get your code upstreamed.
https://people.kernel.org/tglx/notes-about-netiquette
On Thu, Apr 23, 2020 at 11:48:34AM -0700, Makarand Sonare wrote:
> Thanks for the feedback!
> *>>Any plans to enhance the vmx_set_nested_state_test.c to verify this
> works as intended?*
> I am working on writing the test as part of state_test.c as that test
> already has the nested state save/restore logic.
>
> *>>Build tested only, but {get,put}_user() compiles just fine, as
> requested.*
> The impacted functions are using copy_to_user/copy_from_user already so I
> used the same for consistency.
That's because they're copying complex structures that don't fit in a
single memory access.
> I will send a v3 PATCH after incorporating rest of the feedback.
In the future, please give folks a chance to digest and respond before
spinning another version, especially when there is disagreement over which
direction to take. E.g. the above {get,put}_user() thing is easy to sort
out with a few back-and-forth mails. Sending v3 prematurely (and v2 for
that matter) means that closing that discussion requires tracking down the
new version and providing the necessary context. Which, as an aside, is
why trimming mails and bottom-posting is helpful as it allows the scope of
the conversation to be narrowed to the remaining opens.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-04-23 19:58 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-04-22 20:50 [kvm PATCH] KVM: nVMX - enable VMX preemption timer migration Makarand Sonare
2020-04-23 15:33 ` Sean Christopherson
[not found] ` <CA+qz5spBA0HiXbvtZ-7bvfvf20HbfVaV0UrXF=NOGMA1ptuTHg@mail.gmail.com>
2020-04-23 19:58 ` Sean Christopherson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).