From: Paolo Bonzini <pbonzini@redhat.com>
To: Sunil Muthuswamy <sunilmut@microsoft.com>,
Richard Henderson <rth@twiddle.net>,
Eduardo Habkost <ehabkost@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>,
"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>
Subject: Re: PATCH] WHPX: TSC get and set should be dependent on VM state
Date: Fri, 28 Feb 2020 11:45:08 +0100 [thread overview]
Message-ID: <37e639a1-eb9f-b769-d61c-112d148bcff3@redhat.com> (raw)
In-Reply-To: <SN4PR2101MB08804D23439166E81FF151F7C0EA0@SN4PR2101MB0880.namprd21.prod.outlook.com>
On 26/02/20 21:54, Sunil Muthuswamy wrote:
> Currently, TSC is set as part of the VM runtime state. Setting TSC at
> runtime is heavy and additionally can have side effects on the guest,
> which are not very resilient to variances in the TSC. This patch uses
> the VM state to determine whether to set TSC or not. Some minor
> enhancements for getting TSC values as well that considers the VM state.
>
> Additionally, while setting the TSC, the partition is suspended to
> reduce the variance in the TSC value across vCPUs.
>
> Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Looks good. Do you want me to queue this until you can have your GPG
key signed? (And also, I can help you sign it of course).
Thanks,
> ---
> include/sysemu/whpx.h | 7 +++
> target/i386/whp-dispatch.h | 9 ++++
> target/i386/whpx-all.c | 103 +++++++++++++++++++++++++++++++++----
> 3 files changed, 110 insertions(+), 9 deletions(-)
>
> diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
> index 4794e8effe..a84b49e749 100644
> --- a/include/sysemu/whpx.h
> +++ b/include/sysemu/whpx.h
> @@ -35,4 +35,11 @@ int whpx_enabled(void);
>
> #endif /* CONFIG_WHPX */
>
> +/* state subset only touched by the VCPU itself during runtime */
> +#define WHPX_SET_RUNTIME_STATE 1
> +/* state subset modified during VCPU reset */
> +#define WHPX_SET_RESET_STATE 2
> +/* full state set, modified during initialization or on vmload */
> +#define WHPX_SET_FULL_STATE 3
> +
> #endif /* QEMU_WHPX_H */
> diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
> index 87d049ceab..e4695c349f 100644
> --- a/target/i386/whp-dispatch.h
> +++ b/target/i386/whp-dispatch.h
> @@ -23,6 +23,12 @@
> X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \
> X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \
>
> +/*
> + * These are supplemental functions that may not be present
> + * on all versions and are not critical for basic functionality.
> + */
> +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \
> + X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \
>
> #define LIST_WINHVEMULATION_FUNCTIONS(X) \
> X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \
> @@ -40,10 +46,12 @@
> /* Define function typedef */
> LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE)
> LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE)
> +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE)
>
> struct WHPDispatch {
> LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER)
> LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER)
> + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER)
> };
>
> extern struct WHPDispatch whp_dispatch;
> @@ -53,6 +61,7 @@ bool init_whp_dispatch(void);
> typedef enum WHPFunctionList {
> WINHV_PLATFORM_FNS_DEFAULT,
> WINHV_EMULATION_FNS_DEFAULT,
> + WINHV_PLATFORM_FNS_SUPPLEMENTAL
> } WHPFunctionList;
>
> #endif /* WHP_DISPATCH_H */
> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> index 35601b8176..6a885c0fb3 100644
> --- a/target/i386/whpx-all.c
> +++ b/target/i386/whpx-all.c
> @@ -114,7 +114,6 @@ static const WHV_REGISTER_NAME whpx_register_names[] = {
> WHvX64RegisterXmmControlStatus,
>
> /* X64 MSRs */
> - WHvX64RegisterTsc,
> WHvX64RegisterEfer,
> #ifdef TARGET_X86_64
> WHvX64RegisterKernelGsBase,
> @@ -215,7 +214,44 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
> return qs;
> }
>
> -static void whpx_set_registers(CPUState *cpu)
> +static int whpx_set_tsc(CPUState *cpu)
> +{
> + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> + WHV_REGISTER_VALUE tsc_val;
> + HRESULT hr;
> + struct whpx_state *whpx = &whpx_global;
> +
> + /*
> + * Suspend the partition prior to setting the TSC to reduce the variance
> + * in TSC across vCPUs. When the first vCPU runs post suspend, the
> + * partition is automatically resumed.
> + */
> + if (whp_dispatch.WHvSuspendPartitionTime) {
> +
> + /*
> + * Unable to suspend partition while setting TSC is not a fatal
> + * error. It just increases the likelihood of TSC variance between
> + * vCPUs and some guest OS are able to handle that just fine.
> + */
> + hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
> + if (FAILED(hr)) {
> + warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
> + }
> + }
> +
> + tsc_val.Reg64 = env->tsc;
> + hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
> + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> + if (FAILED(hr)) {
> + error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static void whpx_set_registers(CPUState *cpu, int level)
> {
> struct whpx_state *whpx = &whpx_global;
> struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
> @@ -230,6 +266,14 @@ static void whpx_set_registers(CPUState *cpu)
>
> assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>
> + /*
> + * Following MSRs have side effects on the guest or are too heavy for
> + * runtime. Limit them to full state update.
> + */
> + if (level >= WHPX_SET_RESET_STATE) {
> + whpx_set_tsc(cpu);
> + }
> +
> memset(&vcxt, 0, sizeof(struct whpx_register_set));
>
> v86 = (env->eflags & VM_MASK);
> @@ -330,8 +374,6 @@ static void whpx_set_registers(CPUState *cpu)
> idx += 1;
>
> /* MSRs */
> - assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> - vcxt.values[idx++].Reg64 = env->tsc;
> assert(whpx_register_names[idx] == WHvX64RegisterEfer);
> vcxt.values[idx++].Reg64 = env->efer;
> #ifdef TARGET_X86_64
> @@ -379,6 +421,25 @@ static void whpx_set_registers(CPUState *cpu)
> return;
> }
>
> +static int whpx_get_tsc(CPUState *cpu)
> +{
> + struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> + WHV_REGISTER_VALUE tsc_val;
> + HRESULT hr;
> + struct whpx_state *whpx = &whpx_global;
> +
> + hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> + if (FAILED(hr)) {
> + error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
> + return -1;
> + }
> +
> + env->tsc = tsc_val.Reg64;
> + return 0;
> +}
> +
> static void whpx_get_registers(CPUState *cpu)
> {
> struct whpx_state *whpx = &whpx_global;
> @@ -394,6 +455,11 @@ static void whpx_get_registers(CPUState *cpu)
>
> assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>
> + if (!env->tsc_valid) {
> + whpx_get_tsc(cpu);
> + env->tsc_valid = !runstate_is_running();
> + }
> +
> hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> whpx->partition, cpu->cpu_index,
> whpx_register_names,
> @@ -492,8 +558,6 @@ static void whpx_get_registers(CPUState *cpu)
> idx += 1;
>
> /* MSRs */
> - assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> - env->tsc = vcxt.values[idx++].Reg64;
> assert(whpx_register_names[idx] == WHvX64RegisterEfer);
> env->efer = vcxt.values[idx++].Reg64;
> #ifdef TARGET_X86_64
> @@ -896,7 +960,7 @@ static int whpx_vcpu_run(CPUState *cpu)
>
> do {
> if (cpu->vcpu_dirty) {
> - whpx_set_registers(cpu);
> + whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
> cpu->vcpu_dirty = false;
> }
>
> @@ -1074,14 +1138,14 @@ static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
> static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
> run_on_cpu_data arg)
> {
> - whpx_set_registers(cpu);
> + whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
> cpu->vcpu_dirty = false;
> }
>
> static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
> run_on_cpu_data arg)
> {
> - whpx_set_registers(cpu);
> + whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
> cpu->vcpu_dirty = false;
> }
>
> @@ -1123,6 +1187,15 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
>
> static Error *whpx_migration_blocker;
>
> +static void whpx_cpu_update_state(void *opaque, int running, RunState state)
> +{
> + CPUX86State *env = opaque;
> +
> + if (running) {
> + env->tsc_valid = false;
> + }
> +}
> +
> int whpx_init_vcpu(CPUState *cpu)
> {
> HRESULT hr;
> @@ -1178,6 +1251,7 @@ int whpx_init_vcpu(CPUState *cpu)
>
> cpu->vcpu_dirty = true;
> cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
> + qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
>
> return 0;
> }
> @@ -1367,6 +1441,10 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
>
> #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
> #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
> + #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
> + whp_dispatch.function_name = \
> + (function_name ## _t)GetProcAddress(hLib, #function_name); \
> +
> #define WHP_LOAD_FIELD(return_type, function_name, signature) \
> whp_dispatch.function_name = \
> (function_name ## _t)GetProcAddress(hLib, #function_name); \
> @@ -1394,6 +1472,11 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
> WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
> LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
> break;
> +
> + case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
> + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
> + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
> + break;
> }
>
> *handle = hLib;
> @@ -1554,6 +1637,8 @@ bool init_whp_dispatch(void)
> goto error;
> }
>
> + assert(load_whp_dispatch_fns(&hWinHvPlatform,
> + WINHV_PLATFORM_FNS_SUPPLEMENTAL));
> whp_dispatch_initialized = true;
>
> return true;
>
next prev parent reply other threads:[~2020-02-28 10:45 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-26 20:54 PATCH] WHPX: TSC get and set should be dependent on VM state Sunil Muthuswamy
2020-02-28 10:45 ` Paolo Bonzini [this message]
2020-02-28 21:02 ` [EXTERNAL] " Sunil Muthuswamy
2020-02-29 9:39 ` Paolo Bonzini
2020-03-02 19:59 ` Sunil Muthuswamy
2020-03-03 17:52 ` Paolo Bonzini
2020-03-04 22:44 ` Sunil Muthuswamy
2020-03-04 22:47 ` Paolo Bonzini
2020-07-14 18:13 ` Sunil Muthuswamy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=37e639a1-eb9f-b769-d61c-112d148bcff3@redhat.com \
--to=pbonzini@redhat.com \
--cc=ehabkost@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=sunilmut@microsoft.com \
--cc=sw@weilnetz.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).