qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: Sunil Muthuswamy <sunilmut@microsoft.com>,
	Richard Henderson <rth@twiddle.net>,
	Eduardo Habkost <ehabkost@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>,
	"qemu-devel@nongnu.org" <qemu-devel@nongnu.org>
Subject: Re: PATCH] WHPX: TSC get and set should be dependent on VM state
Date: Fri, 28 Feb 2020 11:45:08 +0100	[thread overview]
Message-ID: <37e639a1-eb9f-b769-d61c-112d148bcff3@redhat.com> (raw)
In-Reply-To: <SN4PR2101MB08804D23439166E81FF151F7C0EA0@SN4PR2101MB0880.namprd21.prod.outlook.com>

On 26/02/20 21:54, Sunil Muthuswamy wrote:
> Currently, TSC is set as part of the VM runtime state. Setting TSC at
> runtime is heavy and additionally can have side effects on the guest,
> which are not very resilient to variances in the TSC. This patch uses
> the VM state to determine whether to set TSC or not. Some minor
> enhancements for getting TSC values as well that considers the VM state.
> 
> Additionally, while setting the TSC, the partition is suspended to
> reduce the variance in the TSC value across vCPUs.
> 
> Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>

Looks good.  Do you want me to queue this until you can have your GPG
key signed?  (And also, I can help you sign it of course).

Thanks,

> ---
>  include/sysemu/whpx.h      |   7 +++
>  target/i386/whp-dispatch.h |   9 ++++
>  target/i386/whpx-all.c     | 103 +++++++++++++++++++++++++++++++++----
>  3 files changed, 110 insertions(+), 9 deletions(-)
> 
> diff --git a/include/sysemu/whpx.h b/include/sysemu/whpx.h
> index 4794e8effe..a84b49e749 100644
> --- a/include/sysemu/whpx.h
> +++ b/include/sysemu/whpx.h
> @@ -35,4 +35,11 @@ int whpx_enabled(void);
>  
>  #endif /* CONFIG_WHPX */
>  
> +/* state subset only touched by the VCPU itself during runtime */
> +#define WHPX_SET_RUNTIME_STATE   1
> +/* state subset modified during VCPU reset */
> +#define WHPX_SET_RESET_STATE     2
> +/* full state set, modified during initialization or on vmload */
> +#define WHPX_SET_FULL_STATE      3
> +
>  #endif /* QEMU_WHPX_H */
> diff --git a/target/i386/whp-dispatch.h b/target/i386/whp-dispatch.h
> index 87d049ceab..e4695c349f 100644
> --- a/target/i386/whp-dispatch.h
> +++ b/target/i386/whp-dispatch.h
> @@ -23,6 +23,12 @@
>    X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \
>    X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \
>  
> +/*
> + * These are supplemental functions that may not be present
> + * on all versions and are not critical for basic functionality.
> + */
> +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \
> +  X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \
>  
>  #define LIST_WINHVEMULATION_FUNCTIONS(X) \
>    X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \
> @@ -40,10 +46,12 @@
>  /* Define function typedef */
>  LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE)
>  LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE)
> +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE)
>  
>  struct WHPDispatch {
>      LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER)
>      LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER)
> +    LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER)
>  };
>  
>  extern struct WHPDispatch whp_dispatch;
> @@ -53,6 +61,7 @@ bool init_whp_dispatch(void);
>  typedef enum WHPFunctionList {
>      WINHV_PLATFORM_FNS_DEFAULT,
>      WINHV_EMULATION_FNS_DEFAULT,
> +    WINHV_PLATFORM_FNS_SUPPLEMENTAL
>  } WHPFunctionList;
>  
>  #endif /* WHP_DISPATCH_H */
> diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c
> index 35601b8176..6a885c0fb3 100644
> --- a/target/i386/whpx-all.c
> +++ b/target/i386/whpx-all.c
> @@ -114,7 +114,6 @@ static const WHV_REGISTER_NAME whpx_register_names[] = {
>      WHvX64RegisterXmmControlStatus,
>  
>      /* X64 MSRs */
> -    WHvX64RegisterTsc,
>      WHvX64RegisterEfer,
>  #ifdef TARGET_X86_64
>      WHvX64RegisterKernelGsBase,
> @@ -215,7 +214,44 @@ static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
>      return qs;
>  }
>  
> -static void whpx_set_registers(CPUState *cpu)
> +static int whpx_set_tsc(CPUState *cpu)
> +{
> +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> +    WHV_REGISTER_VALUE tsc_val;
> +    HRESULT hr;
> +    struct whpx_state *whpx = &whpx_global;
> +
> +    /*
> +     * Suspend the partition prior to setting the TSC to reduce the variance
> +     * in TSC across vCPUs. When the first vCPU runs post suspend, the
> +     * partition is automatically resumed.
> +     */
> +    if (whp_dispatch.WHvSuspendPartitionTime) {
> +
> +        /*
> +         * Unable to suspend partition while setting TSC is not a fatal
> +         * error. It just increases the likelihood of TSC variance between
> +         * vCPUs and some guest OS are able to handle that just fine.
> +         */
> +        hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
> +        if (FAILED(hr)) {
> +            warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
> +        }
> +    }
> +
> +    tsc_val.Reg64 = env->tsc;
> +    hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
> +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> +    if (FAILED(hr)) {
> +        error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static void whpx_set_registers(CPUState *cpu, int level)
>  {
>      struct whpx_state *whpx = &whpx_global;
>      struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
> @@ -230,6 +266,14 @@ static void whpx_set_registers(CPUState *cpu)
>  
>      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>  
> +    /*
> +     * Following MSRs have side effects on the guest or are too heavy for
> +     * runtime. Limit them to full state update.
> +     */
> +    if (level >= WHPX_SET_RESET_STATE) {
> +        whpx_set_tsc(cpu);
> +    }
> +
>      memset(&vcxt, 0, sizeof(struct whpx_register_set));
>  
>      v86 = (env->eflags & VM_MASK);
> @@ -330,8 +374,6 @@ static void whpx_set_registers(CPUState *cpu)
>      idx += 1;
>  
>      /* MSRs */
> -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> -    vcxt.values[idx++].Reg64 = env->tsc;
>      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
>      vcxt.values[idx++].Reg64 = env->efer;
>  #ifdef TARGET_X86_64
> @@ -379,6 +421,25 @@ static void whpx_set_registers(CPUState *cpu)
>      return;
>  }
>  
> +static int whpx_get_tsc(CPUState *cpu)
> +{
> +    struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
> +    WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
> +    WHV_REGISTER_VALUE tsc_val;
> +    HRESULT hr;
> +    struct whpx_state *whpx = &whpx_global;
> +
> +    hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
> +        whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
> +    if (FAILED(hr)) {
> +        error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
> +        return -1;
> +    }
> +
> +    env->tsc = tsc_val.Reg64;
> +    return 0;
> +}
> +
>  static void whpx_get_registers(CPUState *cpu)
>  {
>      struct whpx_state *whpx = &whpx_global;
> @@ -394,6 +455,11 @@ static void whpx_get_registers(CPUState *cpu)
>  
>      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>  
> +    if (!env->tsc_valid) {
> +        whpx_get_tsc(cpu);
> +        env->tsc_valid = !runstate_is_running();
> +    }
> +
>      hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
>          whpx->partition, cpu->cpu_index,
>          whpx_register_names,
> @@ -492,8 +558,6 @@ static void whpx_get_registers(CPUState *cpu)
>      idx += 1;
>  
>      /* MSRs */
> -    assert(whpx_register_names[idx] == WHvX64RegisterTsc);
> -    env->tsc = vcxt.values[idx++].Reg64;
>      assert(whpx_register_names[idx] == WHvX64RegisterEfer);
>      env->efer = vcxt.values[idx++].Reg64;
>  #ifdef TARGET_X86_64
> @@ -896,7 +960,7 @@ static int whpx_vcpu_run(CPUState *cpu)
>  
>      do {
>          if (cpu->vcpu_dirty) {
> -            whpx_set_registers(cpu);
> +            whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
>              cpu->vcpu_dirty = false;
>          }
>  
> @@ -1074,14 +1138,14 @@ static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
>  static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
>                                                 run_on_cpu_data arg)
>  {
> -    whpx_set_registers(cpu);
> +    whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
>      cpu->vcpu_dirty = false;
>  }
>  
>  static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
>                                                run_on_cpu_data arg)
>  {
> -    whpx_set_registers(cpu);
> +    whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
>      cpu->vcpu_dirty = false;
>  }
>  
> @@ -1123,6 +1187,15 @@ void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
>  
>  static Error *whpx_migration_blocker;
>  
> +static void whpx_cpu_update_state(void *opaque, int running, RunState state)
> +{
> +    CPUX86State *env = opaque;
> +
> +    if (running) {
> +        env->tsc_valid = false;
> +    }
> +}
> +
>  int whpx_init_vcpu(CPUState *cpu)
>  {
>      HRESULT hr;
> @@ -1178,6 +1251,7 @@ int whpx_init_vcpu(CPUState *cpu)
>  
>      cpu->vcpu_dirty = true;
>      cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
> +    qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
>  
>      return 0;
>  }
> @@ -1367,6 +1441,10 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
>  
>      #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
>      #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
> +    #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
> +        whp_dispatch.function_name = \
> +            (function_name ## _t)GetProcAddress(hLib, #function_name); \
> +
>      #define WHP_LOAD_FIELD(return_type, function_name, signature) \
>          whp_dispatch.function_name = \
>              (function_name ## _t)GetProcAddress(hLib, #function_name); \
> @@ -1394,6 +1472,11 @@ static bool load_whp_dispatch_fns(HMODULE *handle,
>          WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
>          LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
>          break;
> +
> +    case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
> +        WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
> +        LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
> +        break;
>      }
>  
>      *handle = hLib;
> @@ -1554,6 +1637,8 @@ bool init_whp_dispatch(void)
>          goto error;
>      }
>  
> +    assert(load_whp_dispatch_fns(&hWinHvPlatform,
> +        WINHV_PLATFORM_FNS_SUPPLEMENTAL));
>      whp_dispatch_initialized = true;
>  
>      return true;
> 



  reply	other threads:[~2020-02-28 10:45 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-26 20:54 PATCH] WHPX: TSC get and set should be dependent on VM state Sunil Muthuswamy
2020-02-28 10:45 ` Paolo Bonzini [this message]
2020-02-28 21:02   ` [EXTERNAL] " Sunil Muthuswamy
2020-02-29  9:39     ` Paolo Bonzini
2020-03-02 19:59       ` Sunil Muthuswamy
2020-03-03 17:52         ` Paolo Bonzini
2020-03-04 22:44           ` Sunil Muthuswamy
2020-03-04 22:47             ` Paolo Bonzini
2020-07-14 18:13               ` Sunil Muthuswamy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=37e639a1-eb9f-b769-d61c-112d148bcff3@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=sunilmut@microsoft.com \
    --cc=sw@weilnetz.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).