From: Yunhong Jiang <yunhong.jiang@linux.intel.com>
To: kvm@vger.kernel.org
Cc: mtosatti@redhat.com, rkrcmar@redhat.com, pbonzini@redhat.com,
kernellwp@gmail.com
Subject: [RFC PATCH V3 3/5] Utilize the vmx preemption timer
Date: Fri, 3 Jun 2016 17:42:29 -0700 [thread overview]
Message-ID: <1465000951-13343-4-git-send-email-yunhong.jiang@linux.intel.com> (raw)
In-Reply-To: <1465000951-13343-1-git-send-email-yunhong.jiang@linux.intel.com>
From: Yunhong Jiang <yunhong.jiang@gmail.com>
Adding the basic VMX preemption timer functionality, including checking
if the feature is supported, if the feature is broken on the CPU,
setup/clean the VMX preemption timer.
Also adds a parameter to state if the VMX preemption timer should be
utilized.
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
arch/x86/include/asm/kvm_host.h | 8 +++
arch/x86/kvm/vmx.c | 120 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 127 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0fbe7e70dc1..2116b7b0b6c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -655,6 +655,11 @@ struct kvm_vcpu_arch {
int pending_ioapic_eoi;
int pending_external_vector;
+
+ /* the host tsc value when set hv_deadline_tsc */
+ u64 hv_orig_tsc;
+ /* apic deadline value in host tsc */
+ u64 hv_deadline_tsc;
};
struct kvm_lpage_info {
@@ -1005,6 +1010,9 @@ struct kvm_x86_ops {
int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51b08cd43bb7..3e407c6be171 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -110,6 +110,10 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL
+static int cpu_preemption_timer_multi;
+static bool __read_mostly enable_hv_timer;
+module_param_named(enable_hv_timer, enable_hv_timer, bool, S_IRUGO);
+
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
@@ -1056,6 +1060,61 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
}
+/*
+ * Comment's format: document - errata name - stepping - processor name.
+ * Refer from
+ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
+ */
+static u32 vmx_preemption_cpu_tfms[] = {
+/* 323344.pdf - BA86 - D0 - Xeon 7500 Series */
+0x000206E6,
+/* 323056.pdf - AAX65 - C2 - Xeon L3406 */
+/* 322814.pdf - AAT59 - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
+/* 322911.pdf - AAU65 - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020652,
+/* 322911.pdf - AAU65 - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020655,
+/* 322373.pdf - AAO95 - B1 - Xeon 3400 Series */
+/* 322166.pdf - AAN92 - B1 - i7-800 and i5-700 Desktop */
+/*
+ * 320767.pdf - AAP86 - B1 -
+ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
+ */
+0x000106E5,
+/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
+0x000106A0,
+/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
+0x000106A1,
+/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
+0x000106A4,
+ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
+ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
+ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
+0x000106A5,
+};
+
+static inline bool cpu_has_broken_vmx_preemption_timer(void)
+{
+ u32 eax = cpuid_eax(0x00000001), i;
+
+ /* Clear the reserved bits */
+ eax &= ~(0x3U << 14 | 0xfU << 28);
+ for (i = 0; i < sizeof(vmx_preemption_cpu_tfms)/sizeof(u32); i++)
+ if (eax == vmx_preemption_cpu_tfms[i])
+ return true;
+
+ return false;
+}
+
+static inline bool cpu_has_vmx_preemption_timer(void)
+{
+ if (cpu_has_broken_vmx_preemption_timer())
+ return false;
+
+ return vmcs_config.pin_based_exec_ctrl &
+ PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
static inline bool cpu_has_vmx_posted_intr(void)
{
return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
@@ -3308,7 +3367,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
return -EIO;
min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -4781,6 +4841,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ /* Enable the preemption timer dynamically */
+ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
}
@@ -6389,6 +6451,24 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
+ /*
+ * We support only x86_64 platform now because guest_tsc and host_tsc
+ * conversion is only done there yet.
+ */
+#ifdef CONFIG_X86_64
+ if (cpu_has_vmx_preemption_timer() && enable_hv_timer) {
+ u64 vmx_msr;
+
+ rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+ cpu_preemption_timer_multi =
+ vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ } else
+#endif
+ {
+ kvm_x86_ops->set_hv_timer = NULL;
+ kvm_x86_ops->cancel_hv_timer = NULL;
+ }
+
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
return alloc_kvm_area();
@@ -10662,6 +10742,41 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+{
+ u64 tscl = rdtsc(), delta_tsc;
+
+ delta_tsc = guest_deadline_tsc - kvm_read_l1_tsc(vcpu, tscl);
+
+ /* Convert to host delta tsc if tsc scaling is enabled */
+ if (vcpu->arch.tsc_scaling_ratio &&
+ u64_shl_div_u64(delta_tsc,
+ kvm_tsc_scaling_ratio_frac_bits,
+ vcpu->arch.tsc_scaling_ratio,
+ &delta_tsc))
+ return -1;
+ /*
+ * If the delta tsc can't be fit in the 32 bit after the multi shift,
+ * we can't use the preemption timer.
+ * It's possible that it can be fit when vmentry happens late, but
+ * checking on every vmentry is costly, so fail earilier.
+ */
+ if (delta_tsc >> (cpu_preemption_timer_multi + 32))
+ return -1;
+
+ vcpu->arch.hv_orig_tsc = tscl;
+ vcpu->arch.hv_deadline_tsc = tscl + delta_tsc;
+ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ return 0;
+}
+
+static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
+{
+ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+}
+
static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
if (ple_gap)
@@ -11038,6 +11153,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.pmu_ops = &intel_pmu_ops,
.update_pi_irte = vmx_update_pi_irte,
+
+ .set_hv_timer = vmx_set_hv_timer,
+ .cancel_hv_timer = vmx_cancel_hv_timer,
};
static int __init vmx_init(void)
--
1.8.3.1
next prev parent reply other threads:[~2016-06-04 0:46 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-04 0:42 [RFC PATCH V3 0/5] Utilizing VMX preemption for timer virtualization Yunhong Jiang
2016-06-04 0:42 ` [RFC PATCH V3 1/5] Rename the vmx_pre/post_block to pi_pre/post_block Yunhong Jiang
2016-06-04 0:42 ` [RFC PATCH V3 2/5] Add function for left shift and 64 bit division Yunhong Jiang
2016-06-06 12:37 ` Paolo Bonzini
2016-06-04 0:42 ` Yunhong Jiang [this message]
2016-06-06 12:37 ` [RFC PATCH V3 3/5] Utilize the vmx preemption timer Paolo Bonzini
2016-06-04 0:42 ` [RFC PATCH V3 4/5] Separate the start_sw_tscdeadline Yunhong Jiang
2016-06-04 0:42 ` [RFC PATCH V3 5/5] Utilize the vmx preemption timer for tsc deadline timer Yunhong Jiang
2016-06-06 12:36 ` Paolo Bonzini
2016-06-06 12:45 ` [RFC PATCH V3 0/5] Utilizing VMX preemption for timer virtualization Paolo Bonzini
2016-06-06 18:26 ` yunhong jiang
2016-06-07 16:31 ` David Matlack
2016-06-07 19:30 ` David Matlack
2016-06-08 4:18 ` Wanpeng Li
2016-06-13 20:56 ` yunhong jiang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1465000951-13343-4-git-send-email-yunhong.jiang@linux.intel.com \
--to=yunhong.jiang@linux.intel.com \
--cc=kernellwp@gmail.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
--cc=pbonzini@redhat.com \
--cc=rkrcmar@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox