* [PATCH v2 1/2] Update the kernel headers for 5.10-rc5 + TSC
2020-12-03 17:15 [PATCH v2 0/2] RFC: Precise TSC migration Maxim Levitsky
@ 2020-12-03 17:15 ` Maxim Levitsky
2020-12-03 17:15 ` [PATCH v2 2/2] Implement support for precise TSC migration Maxim Levitsky
1 sibling, 0 replies; 3+ messages in thread
From: Maxim Levitsky @ 2020-12-03 17:15 UTC (permalink / raw)
To: qemu-devel
Cc: Cornelia Huck, Eduardo Habkost, kvm, Michael S. Tsirkin,
Marcelo Tosatti, Richard Henderson, Maxim Levitsky, Paolo Bonzini
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
include/standard-headers/asm-x86/kvm_para.h | 1 +
| 2 +
| 71 ++++++++++++++++++++-
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h
index 07877d3295..215d01b4ec 100644
--- a/include/standard-headers/asm-x86/kvm_para.h
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -32,6 +32,7 @@
#define KVM_FEATURE_POLL_CONTROL 12
#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_FEATURE_ASYNC_PF_INT 14
+#define KVM_FEATURE_MSI_EXT_DEST_ID 15
#define KVM_HINTS_REALTIME 0
--git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 89e5f3d1bb..2a60fc6674 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -12,6 +12,7 @@
#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
#define DE_VECTOR 0
#define DB_VECTOR 1
@@ -403,6 +404,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_TSC_HOST_ACCESS (1 << 5)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
--git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 56ce14ad20..9eedc6e835 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_ARM_NISV 28
#define KVM_EXIT_X86_RDMSR 29
#define KVM_EXIT_X86_WRMSR 30
+#define KVM_EXIT_DIRTY_RING_FULL 31
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -1053,6 +1054,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_X86_USER_SPACE_MSR 188
#define KVM_CAP_X86_MSR_FILTER 189
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PRECISE_TSC 193
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1166,6 +1170,16 @@ struct kvm_clock_data {
__u32 pad[9];
};
+
+#define KVM_TSC_STATE_TIMESTAMP_VALID 1
+#define KVM_TSC_STATE_TSC_ADJUST_VALID 2
+struct kvm_tsc_state {
+ __u32 flags;
+ __u64 nsec;
+ __u64 tsc;
+ __u64 tsc_adjust;
+};
+
/* For KVM_CAP_SW_TLB */
#define KVM_MMU_FSL_BOOKE_NOHV 0
@@ -1511,7 +1525,7 @@ struct kvm_enc_region {
/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log)
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */
#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
/* Available with KVM_CAP_ARM_SVE */
@@ -1557,6 +1571,13 @@ struct kvm_pv_cmd {
/* Available with KVM_CAP_X86_MSR_FILTER */
#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter)
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7)
+
+/* Available with KVM_CAP_PRECISE_TSC*/
+#define KVM_SET_TSC_STATE _IOW(KVMIO, 0xc8, struct kvm_tsc_state)
+#define KVM_GET_TSC_STATE _IOR(KVMIO, 0xc9, struct kvm_tsc_state)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1710,4 +1731,52 @@ struct kvm_hyperv_eventfd {
#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---------------+---------------+--------------|
+ * | bit 1 (reset) | bit 0 (dirty) | Status |
+ * |---------------+---------------+--------------|
+ * | 0 | 0 | Invalid GFN |
+ * | 0 | 1 | Dirty GFN |
+ * | 1 | X | GFN to reset |
+ * |---------------+---------------+--------------|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ * dirtied harvested reset
+ * 00 -----------> 01 -------------> 1X -------+
+ * ^ |
+ * | |
+ * +------------------------------------------+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry. Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY BIT(0)
+#define KVM_DIRTY_GFN_F_RESET BIT(1)
+#define KVM_DIRTY_GFN_F_MASK 0x3
+
+/*
+ * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of
+ * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The
+ * size of the gfn buffer is decided by the first argument when
+ * enabling KVM_CAP_DIRTY_LOG_RING.
+ */
+struct kvm_dirty_gfn {
+ __u32 flags;
+ __u32 slot;
+ __u64 offset;
+};
+
#endif /* __LINUX_KVM_H */
--
2.26.2
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v2 2/2] Implement support for precise TSC migration
2020-12-03 17:15 [PATCH v2 0/2] RFC: Precise TSC migration Maxim Levitsky
2020-12-03 17:15 ` [PATCH v2 1/2] Update the kernel headers for 5.10-rc5 + TSC Maxim Levitsky
@ 2020-12-03 17:15 ` Maxim Levitsky
1 sibling, 0 replies; 3+ messages in thread
From: Maxim Levitsky @ 2020-12-03 17:15 UTC (permalink / raw)
To: qemu-devel
Cc: Cornelia Huck, Eduardo Habkost, kvm, Michael S. Tsirkin,
Marcelo Tosatti, Richard Henderson, Maxim Levitsky, Paolo Bonzini
To enable it, you need to set -accel kvm,x-precise-tsc=on,
and have a kernel that supports this feature.
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
---
accel/kvm/kvm-all.c | 28 +++++++++
include/sysemu/kvm.h | 1 +
target/i386/cpu.h | 1 +
target/i386/kvm.c | 140 +++++++++++++++++++++++++++++++++---------
target/i386/machine.c | 19 ++++++
5 files changed, 161 insertions(+), 28 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index baaa54249d..3829f2e7a3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -104,6 +104,8 @@ struct KVMState
OnOffAuto kernel_irqchip_split;
bool sync_mmu;
uint64_t manual_dirty_log_protect;
+ /* Use KVM_GET_TSC_PRECISE/KVM_SET_TSC_PRECISE to access IA32_TSC */
+ bool precise_tsc;
/* The man page (and posix) say ioctl numbers are signed int, but
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
* unsigned, and treating them as signed here can break things */
@@ -3194,6 +3196,24 @@ bool kvm_kernel_irqchip_split(void)
return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON;
}
+bool kvm_has_precise_tsc(void)
+{
+ return kvm_state && kvm_state->precise_tsc;
+}
+
+static void kvm_set_precise_tsc(Object *obj,
+ bool value, Error **errp G_GNUC_UNUSED)
+{
+ KVMState *s = KVM_STATE(obj);
+ s->precise_tsc = value;
+}
+
+static bool kvm_get_precise_tsc(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+ KVMState *s = KVM_STATE(obj);
+ return s->precise_tsc;
+}
+
static void kvm_accel_instance_init(Object *obj)
{
KVMState *s = KVM_STATE(obj);
@@ -3222,6 +3242,14 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
NULL, NULL);
object_class_property_set_description(oc, "kvm-shadow-mem",
"KVM shadow MMU size");
+
+ object_class_property_add_bool(oc, "x-precise-tsc",
+ kvm_get_precise_tsc,
+ kvm_set_precise_tsc);
+
+ object_class_property_set_description(oc, "x-precise-tsc",
+ "Use precise tsc kvm API");
+
}
static const TypeInfo kvm_accel_type = {
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index bb5d5cf497..14eff2b1c9 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -519,6 +519,7 @@ void kvm_init_irq_routing(KVMState *s);
bool kvm_kernel_irqchip_allowed(void);
bool kvm_kernel_irqchip_required(void);
bool kvm_kernel_irqchip_split(void);
+bool kvm_has_precise_tsc(void);
/**
* kvm_arch_irqchip_create:
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 88e8586f8f..d2230d9735 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1460,6 +1460,7 @@ typedef struct CPUX86State {
uint64_t tsc_adjust;
uint64_t tsc_deadline;
uint64_t tsc_aux;
+ uint64_t tsc_ns_timestamp;
uint64_t xcr0;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index a2934dda02..4adb7d6246 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -121,7 +121,6 @@ static int has_xsave;
static int has_xcrs;
static int has_pit_state2;
static int has_exception_payload;
-
static bool has_msr_mcg_ext_ctl;
static struct kvm_cpuid2 *cpuid_cache;
@@ -196,31 +195,112 @@ static int kvm_get_tsc(CPUState *cs)
{
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
- struct {
- struct kvm_msrs info;
- struct kvm_msr_entry entries[1];
- } msr_data = {};
int ret;
if (env->tsc_valid) {
return 0;
}
- memset(&msr_data, 0, sizeof(msr_data));
- msr_data.info.nmsrs = 1;
- msr_data.entries[0].index = MSR_IA32_TSC;
- env->tsc_valid = !runstate_is_running();
+ if (kvm_has_precise_tsc()) {
+ struct kvm_tsc_state tsc_state;
- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
- if (ret < 0) {
- return ret;
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_TSC_STATE, &tsc_state);
+ if (ret < 0) {
+ return ret;
+ }
+
+ env->tsc = tsc_state.tsc;
+
+ if (tsc_state.flags & KVM_TSC_STATE_TIMESTAMP_VALID) {
+ env->tsc_ns_timestamp = tsc_state.nsec;
+ }
+
+ if (tsc_state.flags & KVM_TSC_STATE_TSC_ADJUST_VALID) {
+ env->tsc_adjust = tsc_state.tsc_adjust;
+ }
+
+ } else {
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[2];
+ } msr_data = {
+ .info.nmsrs = 1,
+ .entries[0].index = MSR_IA32_TSC,
+ };
+
+ if (has_msr_tsc_adjust) {
+ msr_data.info.nmsrs++;
+ msr_data.entries[1].index = MSR_TSC_ADJUST;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(ret == msr_data.info.nmsrs);
+ env->tsc = msr_data.entries[0].data;
+ if (has_msr_tsc_adjust) {
+ env->tsc_adjust = msr_data.entries[1].data;
+ }
}
- assert(ret == 1);
- env->tsc = msr_data.entries[0].data;
+ env->tsc_valid = !runstate_is_running();
return 0;
}
+static int kvm_set_tsc(CPUState *cs)
+{
+ int ret;
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ if (kvm_has_precise_tsc()) {
+ struct kvm_tsc_state tsc_state = {
+ .tsc = env->tsc,
+ };
+
+ if (env->tsc_ns_timestamp) {
+ tsc_state.nsec = env->tsc_ns_timestamp;
+ tsc_state.flags |= KVM_TSC_STATE_TIMESTAMP_VALID;
+ }
+
+ if (has_msr_tsc_adjust) {
+ tsc_state.tsc_adjust = env->tsc_adjust;
+ tsc_state.flags |= KVM_TSC_STATE_TSC_ADJUST_VALID;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_TSC_STATE, &tsc_state);
+ if (ret < 0) {
+ return ret;
+ }
+
+ } else {
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entries[2];
+ } msr_data = {
+ .info.nmsrs = 1,
+ .entries[0].index = MSR_IA32_TSC,
+ .entries[0].data = env->tsc,
+ };
+
+ if (has_msr_tsc_adjust) {
+ msr_data.info.nmsrs++;
+ msr_data.entries[1].index = MSR_TSC_ADJUST;
+ msr_data.entries[1].data = env->tsc_adjust;
+ }
+
+ ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(ret == msr_data.info.nmsrs);
+ }
+ return ret;
+}
+
static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg)
{
kvm_get_tsc(cpu);
@@ -1780,6 +1860,13 @@ int kvm_arch_init_vcpu(CPUState *cs)
}
}
+ if (kvm_has_precise_tsc()) {
+ if (!kvm_check_extension(cs->kvm_state, KVM_CAP_PRECISE_TSC)) {
+ error_report("kvm: Precise TSC is not supported by the host's KVM");
+ return -ENOTSUP;
+ }
+ }
+
if (cpu->vmware_cpuid_freq
/* Guests depend on 0x40000000 to detect this feature, so only expose
* it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */
@@ -2756,9 +2843,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
if (has_msr_tsc_aux) {
kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux);
}
- if (has_msr_tsc_adjust) {
- kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust);
- }
if (has_msr_misc_enable) {
kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE,
env->msr_ia32_misc_enable);
@@ -2802,7 +2886,6 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
* for normal writeback. Limit them to reset or full state updates.
*/
if (level >= KVM_PUT_RESET_STATE) {
- kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc);
kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr);
kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) {
@@ -3142,9 +3225,6 @@ static int kvm_get_msrs(X86CPU *cpu)
if (has_msr_tsc_aux) {
kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0);
}
- if (has_msr_tsc_adjust) {
- kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0);
- }
if (has_msr_tsc_deadline) {
kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0);
}
@@ -3178,10 +3258,6 @@ static int kvm_get_msrs(X86CPU *cpu)
if (has_msr_virt_ssbd) {
kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0);
}
- if (!env->tsc_valid) {
- kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0);
- env->tsc_valid = !runstate_is_running();
- }
#ifdef TARGET_X86_64
if (lm_capable_kernel) {
@@ -3385,9 +3461,6 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_TSC_AUX:
env->tsc_aux = msrs[i].data;
break;
- case MSR_TSC_ADJUST:
- env->tsc_adjust = msrs[i].data;
- break;
case MSR_IA32_TSCDEADLINE:
env->tsc_deadline = msrs[i].data;
break;
@@ -3995,6 +4068,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
if (ret < 0) {
return ret;
}
+
+ ret = kvm_set_tsc(cpu);
+ if (ret < 0) {
+ return ret;
+ }
}
ret = kvm_put_tscdeadline_msr(x86_cpu);
@@ -4064,6 +4142,12 @@ int kvm_arch_get_registers(CPUState *cs)
if (ret < 0) {
goto out;
}
+
+ ret = kvm_get_tsc(cs);
+ if (ret < 0) {
+ goto out;
+ }
+
ret = 0;
out:
cpu_sync_bndcs_hflags(&cpu->env);
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 233e46bb70..59b1c9be2b 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1359,6 +1359,24 @@ static const VMStateDescription vmstate_msr_tsx_ctrl = {
}
};
+
+static bool tsc_ns_timestamp_needed(void *opaque)
+{
+ return kvm_has_precise_tsc();
+}
+
+static const VMStateDescription vmstate_tsc_ns_timestamp = {
+ .name = "cpu/tsc_ns_timestamp",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = tsc_ns_timestamp_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.tsc_ns_timestamp, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -1493,6 +1511,7 @@ VMStateDescription vmstate_x86_cpu = {
#endif
#ifdef CONFIG_KVM
&vmstate_nested_state,
+ &vmstate_tsc_ns_timestamp,
#endif
&vmstate_msr_tsx_ctrl,
NULL
--
2.26.2
^ permalink raw reply related [flat|nested] 3+ messages in thread