From: Maxim Levitsky <mlevitsk@redhat.com>
To: qemu-devel@nongnu.org
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
Eduardo Habkost <ehabkost@redhat.com>,
kvm@vger.kernel.org, "Michael S. Tsirkin" <mst@redhat.com>,
Cornelia Huck <cohuck@redhat.com>,
Richard Henderson <richard.henderson@linaro.org>,
Maxim Levitsky <mlevitsk@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH 2/2] Implement support for precise TSC migration
Date: Mon, 30 Nov 2020 15:38:45 +0200 [thread overview]
Message-ID: <20201130133845.233552-3-mlevitsk@redhat.com> (raw)
In-Reply-To: <20201130133845.233552-1-mlevitsk@redhat.com>
Currently to enable it, you need to set x-precise-tsc=on
for each vcpu.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
target/i386/cpu.c | 1 +
target/i386/cpu.h | 4 ++
target/i386/kvm.c | 141 ++++++++++++++++++++++++++++++++++--------
target/i386/machine.c | 20 ++++++
4 files changed, 139 insertions(+), 27 deletions(-)
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 5a8c96072e..3c82864930 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -7231,6 +7231,7 @@ static Property x86_cpu_properties[] = {
false),
DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level,
true),
+ DEFINE_PROP_BOOL("x-precise-tsc", X86CPU, precise_tsc, false),
DEFINE_PROP_END_OF_LIST()
};
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 88e8586f8f..fd355057b8 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1460,6 +1460,7 @@ typedef struct CPUX86State {
uint64_t tsc_adjust;
uint64_t tsc_deadline;
uint64_t tsc_aux;
+ uint64_t tsc_ns_timestamp;
uint64_t xcr0;
@@ -1743,6 +1744,9 @@ struct X86CPU {
/* Number of physical address bits supported */
uint32_t phys_bits;
+ /* Use KVM_GET_TSC_PRECISE/KVM_SET_TSC_PRECISE to access IA32_TSC */
+ bool precise_tsc;
+
/* in order to simplify APIC support, we leave this pointer to the
user */
struct DeviceState *apic_state;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index a2934dda02..f0488aa6cc 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -121,6 +121,7 @@ static int has_xsave;
static int has_xcrs;
static int has_pit_state2;
static int has_exception_payload;
+static int has_precise_tsc;
static bool has_msr_mcg_ext_ctl;
@@ -196,31 +197,109 @@ static int kvm_get_tsc(CPUState *cs)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
- struct {
- struct kvm_msrs info;
- struct kvm_msr_entry entries[1];
- } msr_data = {};
int ret;
if (env->tsc_valid) {
return 0;
}
- memset(&msr_data, 0, sizeof(msr_data));
- msr_data.info.nmsrs = 1;
- msr_data.entries[0].index = MSR_IA32_TSC;
- env->tsc_valid = !runstate_is_running();
+ if (cpu->precise_tsc) {
+ struct kvm_tsc_state tsc_state;
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
- if (ret < 0) {
- return ret;
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_TSC_STATE, &tsc_state);
+ if (ret < 0) {
+ return ret;
+ }
+
+ env->tsc = tsc_state.tsc;
+ if (tsc_state.flags & KVM_TSC_STATE_TSC_ADJUST_VALID) {
+ env->tsc_adjust = tsc_state.tsc_adjust;
+ }
+ env->tsc_ns_timestamp = tsc_state.nsec;
+
+ } else {
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[2];
+ } msr_data = {};
+
+ memset(&msr_data, 0, sizeof(msr_data));
+ msr_data.info.nmsrs = 1;
+ msr_data.entries[0].index = MSR_IA32_TSC;
+
+ if (has_msr_tsc_adjust) {
+ msr_data.info.nmsrs++;
+ msr_data.entries[1].index = MSR_TSC_ADJUST;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(ret == msr_data.info.nmsrs);
+
+ env->tsc = msr_data.entries[0].data;
+ if (has_msr_tsc_adjust) {
+ env->tsc_adjust = msr_data.entries[1].data;
+ }
}
- assert(ret == 1);
- env->tsc = msr_data.entries[0].data;
+ env->tsc_valid = !runstate_is_running();
return 0;
}
+static int kvm_set_tsc(CPUState *cs)
+{
+ int ret;
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ if (cpu->precise_tsc) {
+ struct kvm_tsc_state tsc_state;
+
+ memset(&tsc_state, 0, sizeof(tsc_state));
+
+ tsc_state.tsc = env->tsc;
+ tsc_state.nsec = env->tsc_ns_timestamp;
+
+ if (has_msr_tsc_adjust) {
+ tsc_state.tsc_adjust = env->tsc_adjust;
+ tsc_state.flags |= KVM_TSC_STATE_TSC_ADJUST_VALID;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_TSC_STATE, &tsc_state);
+ if (ret < 0) {
+ return ret;
+ }
+
+ } else {
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[2];
+ } msr_data = {};
+
+ memset(&msr_data, 0, sizeof(msr_data));
+ msr_data.info.nmsrs = 1;
+ msr_data.entries[0].index = MSR_IA32_TSC;
+ msr_data.entries[0].data = env->tsc;
+
+ if (has_msr_tsc_adjust) {
+ msr_data.info.nmsrs++;
+ msr_data.entries[1].index = MSR_TSC_ADJUST;
+ msr_data.entries[1].data = env->tsc_adjust;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(ret == msr_data.info.nmsrs);
+ }
+ return ret;
+}
+
static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
{
kvm_get_tsc(cpu);
@@ -1780,6 +1859,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
}
+ if (cpu->precise_tsc) {
+ if (!kvm_check_extension(cs->kvm_state, KVM_CAP_PRECISE_TSC)) {
+ error_report("kvm: Precise TSC is not supported by the host's KVM");
+ return -ENOTSUP;
+ }
+ }
+
if (cpu->vmware_cpuid_freq
/* Guests depend on 0x40000000 to detect this feature, so only expose
* it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */
@@ -2196,6 +2282,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS);
int ret;
+
+
/* Work around for kernel header with a typo. TODO: fix header and drop. */
#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT)
#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL
@@ -2215,6 +2303,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
}
}
+ has_precise_tsc = kvm_check_extension(s, KVM_CAP_PRECISE_TSC);
+
return 0;
}
@@ -2756,9 +2846,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
if (has_msr_tsc_aux) {
kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux);
}
- if (has_msr_tsc_adjust) {
- kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust);
- }
if (has_msr_misc_enable) {
kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE,
env->msr_ia32_misc_enable);
@@ -2802,7 +2889,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
* for normal writeback. Limit them to reset or full state updates.
*/
if (level >= KVM_PUT_RESET_STATE) {
- kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr);
kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
@@ -3142,9 +3228,6 @@ static int kvm_get_msrs(X86CPU *cpu)
if (has_msr_tsc_aux) {
kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0);
}
- if (has_msr_tsc_adjust) {
- kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0);
- }
if (has_msr_tsc_deadline) {
kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0);
}
@@ -3178,10 +3261,6 @@ static int kvm_get_msrs(X86CPU *cpu)
if (has_msr_virt_ssbd) {
kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0);
}
- if (!env->tsc_valid) {
- kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0);
- env->tsc_valid = !runstate_is_running();
- }
#ifdef TARGET_X86_64
if (lm_capable_kernel) {
@@ -3385,9 +3464,6 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_TSC_AUX:
env->tsc_aux = msrs[i].data;
break;
- case MSR_TSC_ADJUST:
- env->tsc_adjust = msrs[i].data;
- break;
case MSR_IA32_TSCDEADLINE:
env->tsc_deadline = msrs[i].data;
break;
@@ -3995,6 +4071,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
if (ret < 0) {
return ret;
}
+
+ ret = kvm_set_tsc(cpu);
+ if (ret < 0) {
+ return ret;
+ }
}
ret = kvm_put_tscdeadline_msr(x86_cpu);
@@ -4064,6 +4145,12 @@ int kvm_arch_get_registers(CPUState *cs)
if (ret < 0) {
goto out;
}
+
+ ret = kvm_get_tsc(cs);
+ if (ret < 0) {
+ goto out;
+ }
+
ret = 0;
out:
cpu_sync_bndcs_hflags(&cpu->env);
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 233e46bb70..4f4296a3e4 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1359,6 +1359,25 @@ static const VMStateDescription vmstate_msr_tsx_ctrl = {
}
};
+
+static bool tsc_info_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ return cpu->precise_tsc;
+}
+
+static const VMStateDescription vmstate_tsc_info = {
+ .name = "cpu/tsc_nsec_info",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = tsc_info_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.tsc_ns_timestamp, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -1493,6 +1512,7 @@ VMStateDescription vmstate_x86_cpu = {
#endif
#ifdef CONFIG_KVM
&vmstate_nested_state,
+ &vmstate_tsc_info,
#endif
&vmstate_msr_tsx_ctrl,
NULL
--
2.26.2
prev parent reply other threads:[~2020-11-30 13:41 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-30 13:38 [PATCH 0/2] RFC: Precise TSC migration Maxim Levitsky
2020-11-30 13:38 ` [PATCH 1/2] Update the kernel headers for 5.10-rc5 + TSC Maxim Levitsky
2020-11-30 13:38 ` Maxim Levitsky [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201130133845.233552-3-mlevitsk@redhat.com \
--to=mlevitsk@redhat.com \
--cc=cohuck@redhat.com \
--cc=ehabkost@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mst@redhat.com \
--cc=mtosatti@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).