linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v1 0/4] Enable LBR for the guest
@ 2017-09-25  4:44 Wei Wang
  2017-09-25  4:44 ` [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature Wei Wang
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Wei Wang @ 2017-09-25  4:44 UTC (permalink / raw)
  To: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, ak,
	mingo
  Cc: Wei Wang

This patch series enables the Last Branch Recording feature for the
guest. Instead of trapping each LBR stack MSR access, the MSRs are
passthroughed to the guest. Those MSRs are switched (i.e. load and
saved) on VMExit and VMEntry.

Test:
Try "perf record -b ./test_program" on guest.

Wei Wang (4):
  KVM/vmx: re-write the msr auto switch feature
  KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR
  perf/x86: add a function to get the lbr stack
  KVM/vmx: enable lbr for the guest

 arch/x86/events/intel/lbr.c       |  23 +++++++
 arch/x86/include/asm/perf_event.h |  14 ++++
 arch/x86/kvm/vmx.c                | 135 +++++++++++++++++++++++++++++++++-----
 3 files changed, 154 insertions(+), 18 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature
  2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
@ 2017-09-25  4:44 ` Wei Wang
  2017-09-25 11:54   ` Paolo Bonzini
  2017-09-25  4:44 ` [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR Wei Wang
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Wei Wang @ 2017-09-25  4:44 UTC (permalink / raw)
  To: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, ak,
	mingo
  Cc: Wei Wang

This patch clarifies a vague statement in the SDM: the recommended maximum
number of MSRs that can be automically switched by CPU during VMExit and
VMEntry is 512, rather than 512 Bytes of MSRs.

Depending on the CPU implementations, it may also support more than 512
MSRs to be auto switched. This can be calculated by
(MSR_IA32_VMX_MISC[27:25] + 1) * 512.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
 arch/x86/kvm/vmx.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 63 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0726ca7..8434fc8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -158,6 +158,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 #define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
 #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX    \
 		INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+#define KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT 512
 
 static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
 module_param(ple_gap, int, S_IRUGO);
@@ -178,9 +179,10 @@ static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 static int ple_window_max        = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
 module_param(ple_window_max, int, S_IRUGO);
 
+static int msr_autoload_count_max = KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
+
 extern const ulong vmx_return;
 
-#define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
 struct vmcs {
@@ -588,8 +590,8 @@ struct vcpu_vmx {
 	bool                  __launched; /* temporary, used in vmx_vcpu_run */
 	struct msr_autoload {
 		unsigned nr;
-		struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS];
-		struct vmx_msr_entry host[NR_AUTOLOAD_MSRS];
+		struct vmx_msr_entry *guest;
+		struct vmx_msr_entry *host;
 	} msr_autoload;
 	struct {
 		int           loaded;
@@ -1942,6 +1944,7 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	m->host[i] = m->host[m->nr];
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 }
 
 static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -1997,7 +2000,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		if (m->guest[i].index == msr)
 			break;
 
-	if (i == NR_AUTOLOAD_MSRS) {
+	if (i == msr_autoload_count_max) {
 		printk_once(KERN_WARNING "Not enough msr switch entries. "
 				"Can't add msr %x\n", msr);
 		return;
@@ -2005,6 +2008,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		++m->nr;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
+		vmcs_write32(VM_EXIT_MSR_STORE_COUNT, m->nr);
 	}
 
 	m->guest[i].index = msr;
@@ -5501,6 +5505,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 
@@ -6670,6 +6675,21 @@ static void update_ple_window_actual_max(void)
 			                    ple_window_grow, INT_MIN);
 }
 
+static void update_msr_autoload_count_max(void)
+{
+	u64 vmx_msr;
+	int n;
+
+	/*
+	 * According to the Intel SDM, if Bits 27:25 of MSR_IA32_VMX_MISC is
+	 * n, then (n + 1) * 512 is the recommended max number of MSRs to be
+	 * included in the VMExit and VMEntry MSR auto switch list.
+	 */
+	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
+	n = ((vmx_msr & 0xe000000) >> 25) + 1;
+	msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
+}
+
 /*
  * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
  */
@@ -6837,6 +6857,7 @@ static __init int hardware_setup(void)
 		kvm_disable_tdp();
 
 	update_ple_window_actual_max();
+	update_msr_autoload_count_max();
 
 	/*
 	 * Only enable PML when hardware supports PML feature, and both EPT
@@ -9248,6 +9269,19 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
 }
 
+/*
+ * Currently, the CPU does not support the auto save of MSRs on VMEntry, so we
+ * save the MSRs for the host before entering into guest.
+ */
+static void vmx_save_host_msrs(struct msr_autoload *m)
+
+{
+	u32 i;
+
+	for (i = 0; i < m->nr; i++)
+		m->host[i].value = __rdmsr(m->host[i].index);
+}
+
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9304,6 +9338,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_arm_hv_timer(vcpu);
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
+
+	vmx_save_host_msrs(&vmx->msr_autoload);
 	asm(
 		/* Store host registers */
 		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9504,6 +9540,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	size_t bytes = msr_autoload_count_max * sizeof(struct vmx_msr_entry);
 
 	if (enable_pml)
 		vmx_destroy_pml_buffer(vmx);
@@ -9512,15 +9549,17 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	vmx_free_vcpu_nested(vcpu);
 	free_loaded_vmcs(vmx->loaded_vmcs);
 	kfree(vmx->guest_msrs);
+	free_pages_exact(vmx->msr_autoload.host, bytes);
+	free_pages_exact(vmx->msr_autoload.guest, bytes);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
 }
 
 static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 {
-	int err;
+	int err, cpu;
+	size_t bytes;
 	struct vcpu_vmx *vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	int cpu;
 
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
@@ -9559,6 +9598,17 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto free_msrs;
 	loaded_vmcs_init(vmx->loaded_vmcs);
 
+	bytes = msr_autoload_count_max * sizeof(struct vmx_msr_entry);
+	vmx->msr_autoload.guest = alloc_pages_exact(bytes,
+						    GFP_KERNEL | __GFP_ZERO);
+	if (!vmx->msr_autoload.guest)
+		goto free_vmcs;
+
+	vmx->msr_autoload.host = alloc_pages_exact(bytes,
+						   GFP_KERNEL | __GFP_ZERO);
+	if (!vmx->msr_autoload.guest)
+		goto free_autoload_guest;
+
 	cpu = get_cpu();
 	vmx_vcpu_load(&vmx->vcpu, cpu);
 	vmx->vcpu.cpu = cpu;
@@ -9566,11 +9616,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx_vcpu_put(&vmx->vcpu);
 	put_cpu();
 	if (err)
-		goto free_vmcs;
+		goto free_autoload_host;
 	if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
 		err = alloc_apic_access_page(kvm);
 		if (err)
-			goto free_vmcs;
+			goto free_autoload_host;
 	}
 
 	if (enable_ept) {
@@ -9579,7 +9629,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 		err = init_rmode_identity_map(kvm);
 		if (err)
-			goto free_vmcs;
+			goto free_autoload_host;
 	}
 
 	if (nested) {
@@ -9594,6 +9644,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
 	return &vmx->vcpu;
 
+free_autoload_host:
+	free_pages_exact(vmx->msr_autoload.host, bytes);
+free_autoload_guest:
+	free_pages_exact(vmx->msr_autoload.guest, bytes);
 free_vmcs:
 	free_vpid(vmx->nested.vpid02);
 	free_loaded_vmcs(vmx->loaded_vmcs);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR
  2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
  2017-09-25  4:44 ` [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature Wei Wang
@ 2017-09-25  4:44 ` Wei Wang
  2017-09-25 11:57   ` Paolo Bonzini
  2017-09-25  4:44 ` [PATCH v1 3/4] perf/x86: add a function to get the lbr stack Wei Wang
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Wei Wang @ 2017-09-25  4:44 UTC (permalink / raw)
  To: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, ak,
	mingo
  Cc: Wei Wang

Passthrough the MSR_IA32_DEBUGCTLMSR to the guest, and take advantage of
the hardware VT-x feature to auto switch the msr upon VMExit and VMEntry.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
 arch/x86/kvm/vmx.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8434fc8..5f5c2f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5502,13 +5502,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	if (cpu_has_vmx_vmfunc())
 		vmcs_write64(VM_FUNCTION_CONTROL, 0);
 
-	vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
-	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
 	vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autoload.guest));
-	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 
+	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
+
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
@@ -6821,6 +6820,7 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
 
 	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
 			vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -9285,7 +9285,7 @@ static void vmx_save_host_msrs(struct msr_autoload *m)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long debugctlmsr, cr3, cr4;
+	unsigned long cr3, cr4;
 
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
@@ -9333,7 +9333,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		__write_pkru(vcpu->arch.pkru);
 
 	atomic_switch_perf_msrs(vmx);
-	debugctlmsr = get_debugctlmsr();
 
 	vmx_arm_hv_timer(vcpu);
 
@@ -9445,10 +9444,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
-	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
-	if (debugctlmsr)
-		update_debugctlmsr(debugctlmsr);
-
 #ifndef CONFIG_X86_64
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v1 3/4] perf/x86: add a function to get the lbr stack
  2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
  2017-09-25  4:44 ` [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature Wei Wang
  2017-09-25  4:44 ` [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR Wei Wang
@ 2017-09-25  4:44 ` Wei Wang
  2017-09-25  4:44 ` [PATCH v1 4/4] KVM/vmx: enable lbr for the guest Wei Wang
  2017-09-25 14:59 ` [PATCH v1 0/4] Enable LBR " Andi Kleen
  4 siblings, 0 replies; 18+ messages in thread
From: Wei Wang @ 2017-09-25  4:44 UTC (permalink / raw)
  To: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, ak,
	mingo
  Cc: Wei Wang

The LBR stack MSRs are architecturally specific. The perf subsystem has
already assigned the abstracted MSR values based on the CPU architecture.

This patch enables a caller outside the perf subsystem to get the LBR
stack info. This is useful for hyperviosrs to prepare the lbr feature
for the guest.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
 arch/x86/events/intel/lbr.c       | 23 +++++++++++++++++++++++
 arch/x86/include/asm/perf_event.h | 14 ++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8a6bbac..ea547ec 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1230,3 +1230,26 @@ void intel_pmu_lbr_init_knl(void)
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
 }
+
+/**
+ * perf_get_lbr_stack - get the lbr stack related MSRs
+ *
+ * @stack: the caller's memory to get the lbr stack
+ *
+ * Returns: 0 indicates that the lbr stack has been successfully obtained.
+ */
+int perf_get_lbr_stack(struct perf_lbr_stack *stack)
+{
+	stack->lbr_nr = x86_pmu.lbr_nr;
+	stack->lbr_tos = x86_pmu.lbr_tos;
+	stack->lbr_from = x86_pmu.lbr_from;
+	stack->lbr_to = x86_pmu.lbr_to;
+
+	if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
+		stack->lbr_info = MSR_LBR_INFO_0;
+	else
+		stack->lbr_info = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_get_lbr_stack);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f353061..c098462 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -266,7 +266,16 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
+struct perf_lbr_stack {
+	int		lbr_nr;
+	unsigned long	lbr_tos;
+	unsigned long	lbr_from;
+	unsigned long	lbr_to;
+	unsigned long	lbr_info;
+};
+
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern int perf_get_lbr_stack(struct perf_lbr_stack *stack);
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
 #else
@@ -276,6 +285,11 @@ static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 	return NULL;
 }
 
+static inline int perf_get_lbr_stack(struct perf_lbr_stack *stack)
+{
+	return -1;
+}
+
 static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
 {
 	memset(cap, 0, sizeof(*cap));
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
                   ` (2 preceding siblings ...)
  2017-09-25  4:44 ` [PATCH v1 3/4] perf/x86: add a function to get the lbr stack Wei Wang
@ 2017-09-25  4:44 ` Wei Wang
  2017-09-25  9:16   ` Paolo Bonzini
  2017-09-25 14:57   ` Andi Kleen
  2017-09-25 14:59 ` [PATCH v1 0/4] Enable LBR " Andi Kleen
  4 siblings, 2 replies; 18+ messages in thread
From: Wei Wang @ 2017-09-25  4:44 UTC (permalink / raw)
  To: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, ak,
	mingo
  Cc: Wei Wang

Passthrough the LBR stack to the guest, and auto switch the stack MSRs
upon VMEntry and VMExit.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
---
 arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5f5c2f1..35e02a7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -107,6 +107,9 @@ static u64 __read_mostly host_xss;
 static bool __read_mostly enable_pml = 1;
 module_param_named(pml, enable_pml, bool, S_IRUGO);
 
+static bool __read_mostly enable_lbrv = 1;
+module_param_named(lbrv, enable_lbrv, bool, 0444);
+
 #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
 
 /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
@@ -5428,6 +5431,25 @@ static void ept_set_mmio_spte_mask(void)
 				   VMX_EPT_MISCONFIG_WX_VALUE);
 }
 
+static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
+{
+	int i;
+	struct perf_lbr_stack lbr_stack;
+
+	perf_get_lbr_stack(&lbr_stack);
+
+	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
+	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
+
+	for (i = 0; i < lbr_stack.lbr_nr; i++) {
+		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
+		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
+		if (lbr_stack.lbr_info)
+			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
+					      0);
+	}
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
@@ -5508,6 +5530,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
 
+	if (enable_lbrv)
+		auto_switch_lbr_msrs(vmx);
+
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
@@ -6721,6 +6746,28 @@ void vmx_enable_tdp(void)
 	kvm_enable_tdp();
 }
 
+static void vmx_passthrough_lbr_msrs(void)
+{
+	int i;
+	struct perf_lbr_stack lbr_stack;
+
+	if (perf_get_lbr_stack(&lbr_stack) < 0) {
+		enable_lbrv = false;
+		return;
+	}
+
+	vmx_disable_intercept_for_msr(MSR_LBR_SELECT, false);
+	vmx_disable_intercept_for_msr(lbr_stack.lbr_tos, false);
+
+	for (i = 0; i < lbr_stack.lbr_nr; i++) {
+		vmx_disable_intercept_for_msr(lbr_stack.lbr_from + i, false);
+		vmx_disable_intercept_for_msr(lbr_stack.lbr_to + i, false);
+		if (lbr_stack.lbr_info)
+			vmx_disable_intercept_for_msr(lbr_stack.lbr_info + i,
+						      false);
+	}
+}
+
 static __init int hardware_setup(void)
 {
 	int r = -ENOMEM, i, msr;
@@ -6822,6 +6869,9 @@ static __init int hardware_setup(void)
 	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
 	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
 
+	if (enable_lbrv)
+		vmx_passthrough_lbr_msrs();
+
 	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
 			vmx_msr_bitmap_legacy, PAGE_SIZE);
 	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-25  4:44 ` [PATCH v1 4/4] KVM/vmx: enable lbr for the guest Wei Wang
@ 2017-09-25  9:16   ` Paolo Bonzini
  2017-09-25 12:57     ` Wei Wang
  2017-09-25 14:57   ` Andi Kleen
  1 sibling, 1 reply; 18+ messages in thread
From: Paolo Bonzini @ 2017-09-25  9:16 UTC (permalink / raw)
  To: Wei Wang, virtualization, kvm, linux-kernel, mst, rkrcmar, ak,
	mingo

On 25/09/2017 06:44, Wei Wang wrote:
> Passthrough the LBR stack to the guest, and auto switch the stack MSRs
> upon VMEntry and VMExit.
> 
> Signed-off-by: Wei Wang <wei.w.wang@intel.com>

This has to be enabled separately for each guest, because it may prevent
live migration to hosts with a different family/model.

Paolo

> ---
>  arch/x86/kvm/vmx.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5f5c2f1..35e02a7 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -107,6 +107,9 @@ static u64 __read_mostly host_xss;
>  static bool __read_mostly enable_pml = 1;
>  module_param_named(pml, enable_pml, bool, S_IRUGO);
>  
> +static bool __read_mostly enable_lbrv = 1;
> +module_param_named(lbrv, enable_lbrv, bool, 0444);
> +
>  #define KVM_VMX_TSC_MULTIPLIER_MAX     0xffffffffffffffffULL
>  
>  /* Guest_tsc -> host_tsc conversion requires 64-bit division.  */
> @@ -5428,6 +5431,25 @@ static void ept_set_mmio_spte_mask(void)
>  				   VMX_EPT_MISCONFIG_WX_VALUE);
>  }
>  
> +static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	perf_get_lbr_stack(&lbr_stack);
> +
> +	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
> +	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
> +		if (lbr_stack.lbr_info)
> +			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
> +					      0);
> +	}
> +}
> +
>  #define VMX_XSS_EXIT_BITMAP 0
>  /*
>   * Sets up the vmcs for emulated real mode.
> @@ -5508,6 +5530,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  
>  	add_atomic_switch_msr(vmx, MSR_IA32_DEBUGCTLMSR, 0, 0);
>  
> +	if (enable_lbrv)
> +		auto_switch_lbr_msrs(vmx);
> +
>  	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>  		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>  
> @@ -6721,6 +6746,28 @@ void vmx_enable_tdp(void)
>  	kvm_enable_tdp();
>  }
>  
> +static void vmx_passthrough_lbr_msrs(void)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	if (perf_get_lbr_stack(&lbr_stack) < 0) {
> +		enable_lbrv = false;
> +		return;
> +	}
> +
> +	vmx_disable_intercept_for_msr(MSR_LBR_SELECT, false);
> +	vmx_disable_intercept_for_msr(lbr_stack.lbr_tos, false);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		vmx_disable_intercept_for_msr(lbr_stack.lbr_from + i, false);
> +		vmx_disable_intercept_for_msr(lbr_stack.lbr_to + i, false);
> +		if (lbr_stack.lbr_info)
> +			vmx_disable_intercept_for_msr(lbr_stack.lbr_info + i,
> +						      false);
> +	}
> +}
> +
>  static __init int hardware_setup(void)
>  {
>  	int r = -ENOMEM, i, msr;
> @@ -6822,6 +6869,9 @@ static __init int hardware_setup(void)
>  	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
>  	vmx_disable_intercept_for_msr(MSR_IA32_DEBUGCTLMSR, false);
>  
> +	if (enable_lbrv)
> +		vmx_passthrough_lbr_msrs();
> +
>  	memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
>  			vmx_msr_bitmap_legacy, PAGE_SIZE);
>  	memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature
  2017-09-25  4:44 ` [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature Wei Wang
@ 2017-09-25 11:54   ` Paolo Bonzini
  2017-09-25 13:02     ` Wei Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Paolo Bonzini @ 2017-09-25 11:54 UTC (permalink / raw)
  To: Wei Wang, virtualization, kvm, linux-kernel, mst, rkrcmar, ak,
	mingo

On 25/09/2017 06:44, Wei Wang wrote:
>  
> +static void update_msr_autoload_count_max(void)
> +{
> +	u64 vmx_msr;
> +	int n;
> +
> +	/*
> +	 * According to the Intel SDM, if Bits 27:25 of MSR_IA32_VMX_MISC is
> +	 * n, then (n + 1) * 512 is the recommended max number of MSRs to be
> +	 * included in the VMExit and VMEntry MSR auto switch list.
> +	 */
> +	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
> +	n = ((vmx_msr & 0xe000000) >> 25) + 1;
> +	msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
> +}
> +


Any reasons to do this if it's unlikely that we'll ever update more than
512 MSRs?

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR
  2017-09-25  4:44 ` [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR Wei Wang
@ 2017-09-25 11:57   ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2017-09-25 11:57 UTC (permalink / raw)
  To: Wei Wang, virtualization, kvm, linux-kernel, mst, rkrcmar, ak,
	mingo

On 25/09/2017 06:44, Wei Wang wrote:
> Passthrough the MSR_IA32_DEBUGCTLMSR to the guest, and take advantage of
> the hardware VT-x feature to auto switch the msr upon VMExit and VMEntry.

I think most bits in the MSR should not be passed through (for example
FREEZE_WHILE_SMM_EN, FREEZE_LBRS_ON_PMI etc.).  Using auto-switch of
course is fine instead.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-25  9:16   ` Paolo Bonzini
@ 2017-09-25 12:57     ` Wei Wang
  0 siblings, 0 replies; 18+ messages in thread
From: Wei Wang @ 2017-09-25 12:57 UTC (permalink / raw)
  To: Paolo Bonzini, virtualization, kvm, linux-kernel, mst, rkrcmar,
	ak, mingo

On 09/25/2017 05:16 PM, Paolo Bonzini wrote:
> On 25/09/2017 06:44, Wei Wang wrote:
>> Passthrough the LBR stack to the guest, and auto switch the stack MSRs
>> upon VMEntry and VMExit.
>>
>> Signed-off-by: Wei Wang <wei.w.wang@intel.com>
> This has to be enabled separately for each guest, because it may prevent
> live migration to hosts with a different family/model.

Did you mean trapping MSR_IA32_DEBUGCTLMSR, instead of passing through it?
In that case, we would also need to modify the kernel driver (i.e. the 
PMI handler)
to check MSR_IA32_DEBUGCTLMSR before reading the LBR MSRs. Then the guest
driver can notice that the feature that is in use has been disabled 
after live
migration.

This kind of live migration disables features that are being used. Would 
it be
common in real usage to migrate between different CPU models?

I think this issue isn't specific to the LBR feature. May I know how 
would other
features be handled in this case? Thanks.

On the other hand, an alternative approach coming up to my mind is that we
can do some kind of feature negotiation at the very beginning of 
migration, and
fails live migration if the feature negotiation fails.


Best,
Wei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature
  2017-09-25 11:54   ` Paolo Bonzini
@ 2017-09-25 13:02     ` Wei Wang
  2017-09-25 14:24       ` Paolo Bonzini
  0 siblings, 1 reply; 18+ messages in thread
From: Wei Wang @ 2017-09-25 13:02 UTC (permalink / raw)
  To: Paolo Bonzini, virtualization, kvm, linux-kernel, mst, rkrcmar,
	ak, mingo

On 09/25/2017 07:54 PM, Paolo Bonzini wrote:
> On 25/09/2017 06:44, Wei Wang wrote:
>>   
>> +static void update_msr_autoload_count_max(void)
>> +{
>> +	u64 vmx_msr;
>> +	int n;
>> +
>> +	/*
>> +	 * According to the Intel SDM, if Bits 27:25 of MSR_IA32_VMX_MISC is
>> +	 * n, then (n + 1) * 512 is the recommended max number of MSRs to be
>> +	 * included in the VMExit and VMEntry MSR auto switch list.
>> +	 */
>> +	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
>> +	n = ((vmx_msr & 0xe000000) >> 25) + 1;
>> +	msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
>> +}
>> +
>
> Any reasons to do this if it's unlikely that we'll ever update more than
> 512 MSRs?
>
> Paolo

It isn't a must to allocate memory for 512 MSRs, but I think it would be 
good to
allocate memory at least for 128 MSRs, because on skylake we have 32 
triplets
of MSRs (FROM/TO/INFO), which are 96 in total already.

The disadvantage is that developers would need to manually calculate and 
change
the number carefully when they need to add new MSRs for auto switching 
in the
future. So, if consuming a little bit more memory isn't a concern, I 
think we can
directly allocate memory based on what's recommended by the hardware.


Best,
Wei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature
  2017-09-25 13:02     ` Wei Wang
@ 2017-09-25 14:24       ` Paolo Bonzini
  0 siblings, 0 replies; 18+ messages in thread
From: Paolo Bonzini @ 2017-09-25 14:24 UTC (permalink / raw)
  To: Wei Wang, virtualization, kvm, linux-kernel, mst, rkrcmar, ak,
	mingo

On 25/09/2017 15:02, Wei Wang wrote:
> On 09/25/2017 07:54 PM, Paolo Bonzini wrote:
>> On 25/09/2017 06:44, Wei Wang wrote:
>>>   +static void update_msr_autoload_count_max(void)
>>> +{
>>> +    u64 vmx_msr;
>>> +    int n;
>>> +
>>> +    /*
>>> +     * According to the Intel SDM, if Bits 27:25 of
>>> MSR_IA32_VMX_MISC is
>>> +     * n, then (n + 1) * 512 is the recommended max number of MSRs
>>> to be
>>> +     * included in the VMExit and VMEntry MSR auto switch list.
>>> +     */
>>> +    rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
>>> +    n = ((vmx_msr & 0xe000000) >> 25) + 1;
>>> +    msr_autoload_count_max = n * KVM_VMX_DEFAULT_MSR_AUTO_LOAD_COUNT;
>>> +}
>>> +
>>
>> Any reasons to do this if it's unlikely that we'll ever update more than
>> 512 MSRs?
>>
>> Paolo
> 
> It isn't a must to allocate memory for 512 MSRs, but I think it would
> be good to allocate memory at least for 128 MSRs, because on skylake
> we have 32 triplets of MSRs (FROM/TO/INFO), which are 96 in total
> already.
> 
> The disadvantage is that developers would need to manually calculate
> and change the number carefully when they need to add new MSRs for
> auto switching in the future. So, if consuming a little bit more
> memory isn't a concern, I think we can directly allocate memory based
> on what's recommended by the hardware.
Sure.  I was just thinking that it's unnecessary to actually use
VMX_MISC; sticking to one-page allocations is nicer.

Paolo

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-25  4:44 ` [PATCH v1 4/4] KVM/vmx: enable lbr for the guest Wei Wang
  2017-09-25  9:16   ` Paolo Bonzini
@ 2017-09-25 14:57   ` Andi Kleen
  2017-09-26  8:56     ` Wei Wang
  1 sibling, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2017-09-25 14:57 UTC (permalink / raw)
  To: Wei Wang; +Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

> +static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
> +{
> +	int i;
> +	struct perf_lbr_stack lbr_stack;
> +
> +	perf_get_lbr_stack(&lbr_stack);
> +
> +	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
> +	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
> +
> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
> +		if (lbr_stack.lbr_info)
> +			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
> +					      0);
> +	}

That will be really expensive and add a lot of overhead to every entry/exit.
perf can already context switch the LBRs on task context switch. With that
you can just switch LBR_SELECT, which is *much* cheaper because there
are far less context switches than exit/entries.

It implies that when KVM is running it needs to prevent perf from enabling
LBRs in the context of KVM, but that should be straight forward.

-Andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 0/4] Enable LBR for the guest
  2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
                   ` (3 preceding siblings ...)
  2017-09-25  4:44 ` [PATCH v1 4/4] KVM/vmx: enable lbr for the guest Wei Wang
@ 2017-09-25 14:59 ` Andi Kleen
  2017-09-26  8:47   ` Wei Wang
  4 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2017-09-25 14:59 UTC (permalink / raw)
  To: Wei Wang; +Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

On Mon, Sep 25, 2017 at 12:44:52PM +0800, Wei Wang wrote:
> This patch series enables the Last Branch Recording feature for the
> guest. Instead of trapping each LBR stack MSR access, the MSRs are
> passthroughed to the guest. Those MSRs are switched (i.e. load and
> saved) on VMExit and VMEntry.
> 
> Test:
> Try "perf record -b ./test_program" on guest.

I don't see where you expose the PERF capabilities MSR? 

That's normally needed for LBR too to report the version
number.

-Andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 0/4] Enable LBR for the guest
  2017-09-25 14:59 ` [PATCH v1 0/4] Enable LBR " Andi Kleen
@ 2017-09-26  8:47   ` Wei Wang
  2017-09-26 16:51     ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Wei Wang @ 2017-09-26  8:47 UTC (permalink / raw)
  To: Andi Kleen
  Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

On 09/25/2017 10:59 PM, Andi Kleen wrote:
> On Mon, Sep 25, 2017 at 12:44:52PM +0800, Wei Wang wrote:
>> This patch series enables the Last Branch Recording feature for the
>> guest. Instead of trapping each LBR stack MSR access, the MSRs are
>> passthroughed to the guest. Those MSRs are switched (i.e. load and
>> saved) on VMExit and VMEntry.
>>
>> Test:
>> Try "perf record -b ./test_program" on guest.
> I don't see where you expose the PERF capabilities MSR?
>
> That's normally needed for LBR too to report the version
> number.
>

It was missed, thanks for pointing it out. I also found KVM/QEMU doesn't
expose CPUID.PDCM, will add that too.

Since for now we are enabling LBR, I plan to expose only "PERF_CAP & 0x3f"
to the guest, which reports the LBR format only.

On the other side, it seems that the (guest) kernel driver also works 
without
the above being supported, should we change it to report error and stop
using the PMU features when the check of the above two fails (at 
intel_pmu_init())?


Best,
Wei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-25 14:57   ` Andi Kleen
@ 2017-09-26  8:56     ` Wei Wang
  2017-09-26 16:41       ` Andi Kleen
  0 siblings, 1 reply; 18+ messages in thread
From: Wei Wang @ 2017-09-26  8:56 UTC (permalink / raw)
  To: Andi Kleen
  Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

On 09/25/2017 10:57 PM, Andi Kleen wrote:
>> +static void auto_switch_lbr_msrs(struct vcpu_vmx *vmx)
>> +{
>> +	int i;
>> +	struct perf_lbr_stack lbr_stack;
>> +
>> +	perf_get_lbr_stack(&lbr_stack);
>> +
>> +	add_atomic_switch_msr(vmx, MSR_LBR_SELECT, 0, 0);
>> +	add_atomic_switch_msr(vmx, lbr_stack.lbr_tos, 0, 0);
>> +
>> +	for (i = 0; i < lbr_stack.lbr_nr; i++) {
>> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_from + i, 0, 0);
>> +		add_atomic_switch_msr(vmx, lbr_stack.lbr_to + i, 0, 0);
>> +		if (lbr_stack.lbr_info)
>> +			add_atomic_switch_msr(vmx, lbr_stack.lbr_info + i, 0,
>> +					      0);
>> +	}
> That will be really expensive and add a lot of overhead to every entry/exit.
> perf can already context switch the LBRs on task context switch. With that
> you can just switch LBR_SELECT, which is *much* cheaper because there
> are far less context switches than exit/entries.
>
> It implies that when KVM is running it needs to prevent perf from enabling
> LBRs in the context of KVM, but that should be straight forward.

I kind of have a different thought here:

1) vCPU context switching and guest side task switching are not identical.
That is, when the vCPU is scheduled out, the guest task on the vCPU may not
run out its time slice yet, so the task will continue to run when the 
vCPU is
scheduled in by the host (lbr wasn't save by the guest task when the vCPU is
scheduled out in this case).

It is possible to have the vCPU which runs the guest task (in use of 
lbr) scheduled
out, followed by a new host task being scheduled in on the pCPU to run.
It is not guaranteed that the new host task does not use the LBR feature 
on the
pCPU.

2) Sometimes, people may want this usage: "perf record -b 
./qemu-system-x86_64 ...",
which will need lbr to be used in KVM as well.


I think one possible optimization we could do would be to add the LBR 
MSRs to auto
switching when the guest requests to enable the feature, and remove them 
when
being disabled. This will need to trap guest access to MSR_DEBUGCTL.


Best,
Wei

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-26  8:56     ` Wei Wang
@ 2017-09-26 16:41       ` Andi Kleen
  2017-09-27  1:27         ` Wei Wang
  0 siblings, 1 reply; 18+ messages in thread
From: Andi Kleen @ 2017-09-26 16:41 UTC (permalink / raw)
  To: Wei Wang; +Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

> 1) vCPU context switching and guest side task switching are not identical.
> That is, when the vCPU is scheduled out, the guest task on the vCPU may not

guest task lifetime has nothing to do with this. It's completely independent
of what you do here on the VCPU level.

> run out its time slice yet, so the task will continue to run when the vCPU
> is
> scheduled in by the host (lbr wasn't save by the guest task when the vCPU is
> scheduled out in this case).
> 
> It is possible to have the vCPU which runs the guest task (in use of lbr)
> scheduled
> out, followed by a new host task being scheduled in on the pCPU to run.
> It is not guaranteed that the new host task does not use the LBR feature on
> the
> pCPU.

Sure it may use the LBR, and the normal perf context switch
will switch it and everything works fine.

It's like any other per-task LBR user.

> 
> 2) Sometimes, people may want this usage: "perf record -b
> ./qemu-system-x86_64 ...",
> which will need lbr to be used in KVM as well.

In this obscure case you can disable LBR support for the guest.
The common case is far more important.

It sounds like you didn't do any performance measurements.
I expect the performance of your current solution to be terrible.

e.g. a normal perf PMI does at least 1 MSR reads and 4+ MSR writes
for a single counter. With multiple counters it gets worse.

For each of those you'll need to exit. Adding something
to the entry/exit list is similar to the cost of doing 
explicit RD/WRMSRs.

On Skylake we have 32*3=96 MSRs for the LBRs.

So with the 5 exits and entries, you're essentually doing
5*2*96=18432 extra MSR accesses for each PMI.

MSR access is 100+ cycles at least, for writes it is far more
expensive.

-Andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 0/4] Enable LBR for the guest
  2017-09-26  8:47   ` Wei Wang
@ 2017-09-26 16:51     ` Andi Kleen
  0 siblings, 0 replies; 18+ messages in thread
From: Andi Kleen @ 2017-09-26 16:51 UTC (permalink / raw)
  To: Wei Wang; +Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

> On the other side, it seems that the (guest) kernel driver also works
> without
> the above being supported, should we change it to report error and stop
> using the PMU features when the check of the above two fails (at
> intel_pmu_init())?

You could add the extra check for the LBR code yes, although it already checks
if the LBR MSRs work, so in practice it's likely already covered.

-Andi

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v1 4/4] KVM/vmx: enable lbr for the guest
  2017-09-26 16:41       ` Andi Kleen
@ 2017-09-27  1:27         ` Wei Wang
  0 siblings, 0 replies; 18+ messages in thread
From: Wei Wang @ 2017-09-27  1:27 UTC (permalink / raw)
  To: Andi Kleen
  Cc: virtualization, kvm, linux-kernel, pbonzini, mst, rkrcmar, mingo

On 09/27/2017 12:41 AM, Andi Kleen wrote:
>> 1) vCPU context switching and guest side task switching are not identical.
>> That is, when the vCPU is scheduled out, the guest task on the vCPU may not
> guest task lifetime has nothing to do with this. It's completely independent
> of what you do here on the VCPU level.
>
>> run out its time slice yet, so the task will continue to run when the vCPU
>> is
>> scheduled in by the host (lbr wasn't save by the guest task when the vCPU is
>> scheduled out in this case).
>>
>> It is possible to have the vCPU which runs the guest task (in use of lbr)
>> scheduled
>> out, followed by a new host task being scheduled in on the pCPU to run.
>> It is not guaranteed that the new host task does not use the LBR feature on
>> the
>> pCPU.
> Sure it may use the LBR, and the normal perf context switch
> will switch it and everything works fine.
>
> It's like any other per-task LBR user.

OK, I see the point, thanks.

Why couldn't we save the LBR_SELECT via task switching too?


Best,
Wei

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-09-27  1:24 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-25  4:44 [PATCH v1 0/4] Enable LBR for the guest Wei Wang
2017-09-25  4:44 ` [PATCH v1 1/4] KVM/vmx: re-write the msr auto switch feature Wei Wang
2017-09-25 11:54   ` Paolo Bonzini
2017-09-25 13:02     ` Wei Wang
2017-09-25 14:24       ` Paolo Bonzini
2017-09-25  4:44 ` [PATCH v1 2/4] KVM/vmx: auto switch MSR_IA32_DEBUGCTLMSR Wei Wang
2017-09-25 11:57   ` Paolo Bonzini
2017-09-25  4:44 ` [PATCH v1 3/4] perf/x86: add a function to get the lbr stack Wei Wang
2017-09-25  4:44 ` [PATCH v1 4/4] KVM/vmx: enable lbr for the guest Wei Wang
2017-09-25  9:16   ` Paolo Bonzini
2017-09-25 12:57     ` Wei Wang
2017-09-25 14:57   ` Andi Kleen
2017-09-26  8:56     ` Wei Wang
2017-09-26 16:41       ` Andi Kleen
2017-09-27  1:27         ` Wei Wang
2017-09-25 14:59 ` [PATCH v1 0/4] Enable LBR " Andi Kleen
2017-09-26  8:47   ` Wei Wang
2017-09-26 16:51     ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).