public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Yunhong Jiang <yunhong.jiang@linux.intel.com>
To: kvm@vger.kernel.org
Cc: mtosatti@redhat.com, rkrcmar@redhat.com, pbonzini@redhat.com,
	kernellwp@gmail.com
Subject: [RFC PATCH V3 3/5] Utilize the vmx preemption timer
Date: Fri,  3 Jun 2016 17:42:29 -0700	[thread overview]
Message-ID: <1465000951-13343-4-git-send-email-yunhong.jiang@linux.intel.com> (raw)
In-Reply-To: <1465000951-13343-1-git-send-email-yunhong.jiang@linux.intel.com>

From: Yunhong Jiang <yunhong.jiang@gmail.com>

Adding the basic VMX preemption timer functionality, including checking
if the feature is supported, if the feature is broken on the CPU,
setup/clean the VMX preemption timer.

Also adds a parameter to state if the VMX preemption timer should be
utilized.

Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |   8 +++
 arch/x86/kvm/vmx.c              | 120 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e0fbe7e70dc1..2116b7b0b6c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -655,6 +655,11 @@ struct kvm_vcpu_arch {
 
 	int pending_ioapic_eoi;
 	int pending_external_vector;
+
+	/* the host tsc value when set hv_deadline_tsc */
+	u64 hv_orig_tsc;
+	/* apic deadline value in host tsc */
+	u64 hv_deadline_tsc;
 };
 
 struct kvm_lpage_info {
@@ -1005,6 +1010,9 @@ struct kvm_x86_ops {
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+
+	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+	void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51b08cd43bb7..3e407c6be171 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -110,6 +110,10 @@ module_param_named(pml, enable_pml, bool, S_IRUGO);
 
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
+static int cpu_preemption_timer_multi;
+static bool __read_mostly enable_hv_timer;
+module_param_named(enable_hv_timer, enable_hv_timer, bool, S_IRUGO);
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
@@ -1056,6 +1060,61 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
 		SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+/*
+ * Comment's format: document - errata name - stepping - processor name.
+ * Refer from
+ * https://www.virtualbox.org/svn/vbox/trunk/src/VBox/VMM/VMMR0/HMR0.cpp
+ */
+static u32 vmx_preemption_cpu_tfms[] = {
+/* 323344.pdf - BA86   - D0 - Xeon 7500 Series */
+0x000206E6,
+/* 323056.pdf - AAX65  - C2 - Xeon L3406 */
+/* 322814.pdf - AAT59  - C2 - i7-600, i5-500, i5-400 and i3-300 Mobile */
+/* 322911.pdf - AAU65  - C2 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020652,
+/* 322911.pdf - AAU65  - K0 - i5-600, i3-500 Desktop and Pentium G6950 */
+0x00020655,
+/* 322373.pdf - AAO95  - B1 - Xeon 3400 Series */
+/* 322166.pdf - AAN92  - B1 - i7-800 and i5-700 Desktop */
+/*
+ * 320767.pdf - AAP86  - B1 -
+ * i7-900 Mobile Extreme, i7-800 and i7-700 Mobile
+ */
+0x000106E5,
+/* 321333.pdf - AAM126 - C0 - Xeon 3500 */
+0x000106A0,
+/* 321333.pdf - AAM126 - C1 - Xeon 3500 */
+0x000106A1,
+/* 320836.pdf - AAJ124 - C0 - i7-900 Desktop Extreme and i7-900 Desktop */
+0x000106A4,
+ /* 321333.pdf - AAM126 - D0 - Xeon 3500 */
+ /* 321324.pdf - AAK139 - D0 - Xeon 5500 */
+ /* 320836.pdf - AAJ124 - D0 - i7-900 Extreme and i7-900 Desktop */
+0x000106A5,
+};
+
+static inline bool cpu_has_broken_vmx_preemption_timer(void)
+{
+	u32 eax = cpuid_eax(0x00000001), i;
+
+	/* Clear the reserved bits */
+	eax &= ~(0x3U << 14 | 0xfU << 28);
+	for (i = 0; i < sizeof(vmx_preemption_cpu_tfms)/sizeof(u32); i++)
+		if (eax == vmx_preemption_cpu_tfms[i])
+			return true;
+
+	return false;
+}
+
+static inline bool cpu_has_vmx_preemption_timer(void)
+{
+	if (cpu_has_broken_vmx_preemption_timer())
+		return false;
+
+	return vmcs_config.pin_based_exec_ctrl &
+		PIN_BASED_VMX_PREEMPTION_TIMER;
+}
+
 static inline bool cpu_has_vmx_posted_intr(void)
 {
 	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
@@ -3308,7 +3367,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		return -EIO;
 
 	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR;
+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+		 PIN_BASED_VMX_PREEMPTION_TIMER;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
 		return -EIO;
@@ -4781,6 +4841,8 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 
 	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+	/* Enable the preemption timer dynamically */
+	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 	return pin_based_exec_ctrl;
 }
 
@@ -6389,6 +6451,24 @@ static __init int hardware_setup(void)
 		kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
 	}
 
+	/*
+	 * We support only x86_64 platform now because guest_tsc and host_tsc
+	 * conversion is only done there yet.
+	 */
+#ifdef CONFIG_X86_64
+	if (cpu_has_vmx_preemption_timer() && enable_hv_timer) {
+		u64 vmx_msr;
+
+		rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+		cpu_preemption_timer_multi =
+			 vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+	} else
+#endif
+	{
+		kvm_x86_ops->set_hv_timer = NULL;
+		kvm_x86_ops->cancel_hv_timer = NULL;
+	}
+
 	kvm_set_posted_intr_wakeup_handler(wakeup_handler);
 
 	return alloc_kvm_area();
@@ -10662,6 +10742,41 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	return X86EMUL_CONTINUE;
 }
 
+static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
+{
+	u64 tscl = rdtsc(), delta_tsc;
+
+	delta_tsc = guest_deadline_tsc - kvm_read_l1_tsc(vcpu, tscl);
+
+	/* Convert to host delta tsc if tsc scaling is enabled */
+	if (vcpu->arch.tsc_scaling_ratio &&
+			u64_shl_div_u64(delta_tsc,
+				kvm_tsc_scaling_ratio_frac_bits,
+				vcpu->arch.tsc_scaling_ratio,
+				&delta_tsc))
+		return -1;
+	/*
+	 * If the delta tsc can't be fit in the 32 bit after the multi shift,
+	 * we can't use the preemption timer.
+	 * It's possible that it can be fit when vmentry happens late, but
+	 * checking on every vmentry is costly, so fail earilier.
+	 */
+	if (delta_tsc >> (cpu_preemption_timer_multi + 32))
+		return -1;
+
+	vcpu->arch.hv_orig_tsc = tscl;
+	vcpu->arch.hv_deadline_tsc = tscl + delta_tsc;
+	vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+			PIN_BASED_VMX_PREEMPTION_TIMER);
+	return 0;
+}
+
+static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
+{
+	vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+			PIN_BASED_VMX_PREEMPTION_TIMER);
+}
+
 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 	if (ple_gap)
@@ -11038,6 +11153,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.pmu_ops = &intel_pmu_ops,
 
 	.update_pi_irte = vmx_update_pi_irte,
+
+	.set_hv_timer = vmx_set_hv_timer,
+	.cancel_hv_timer = vmx_cancel_hv_timer,
 };
 
 static int __init vmx_init(void)
-- 
1.8.3.1


  parent reply	other threads:[~2016-06-04  0:46 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-04  0:42 [RFC PATCH V3 0/5] Utilizing VMX preemption for timer virtualization Yunhong Jiang
2016-06-04  0:42 ` [RFC PATCH V3 1/5] Rename the vmx_pre/post_block to pi_pre/post_block Yunhong Jiang
2016-06-04  0:42 ` [RFC PATCH V3 2/5] Add function for left shift and 64 bit division Yunhong Jiang
2016-06-06 12:37   ` Paolo Bonzini
2016-06-04  0:42 ` Yunhong Jiang [this message]
2016-06-06 12:37   ` [RFC PATCH V3 3/5] Utilize the vmx preemption timer Paolo Bonzini
2016-06-04  0:42 ` [RFC PATCH V3 4/5] Separate the start_sw_tscdeadline Yunhong Jiang
2016-06-04  0:42 ` [RFC PATCH V3 5/5] Utilize the vmx preemption timer for tsc deadline timer Yunhong Jiang
2016-06-06 12:36   ` Paolo Bonzini
2016-06-06 12:45 ` [RFC PATCH V3 0/5] Utilizing VMX preemption for timer virtualization Paolo Bonzini
2016-06-06 18:26   ` yunhong jiang
2016-06-07 16:31   ` David Matlack
2016-06-07 19:30   ` David Matlack
2016-06-08  4:18 ` Wanpeng Li
2016-06-13 20:56   ` yunhong jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1465000951-13343-4-git-send-email-yunhong.jiang@linux.intel.com \
    --to=yunhong.jiang@linux.intel.com \
    --cc=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox