[PATCH V1 0/4] KVM vPMU support for x86

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH V1 0/4] KVM vPMU support for x86
@ 2014-10-31 16:05 Wei Huang
  2014-10-31 16:05 ` [PATCH V1 1/4] KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch Wei Huang
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: Wei Huang @ 2014-10-31 16:05 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, gleb, rkrcmar, wei

Currently KVM only supports vPMU for Intel platforms. This patch set 
enable vPMU support for AMD platform by creating a common PMU
interface for x86. The PMU calls from guest VMs are dispatched
to corresponding functions defined in arch specific files.

V1:
  * Adopt the file layout suggested by Radim Krčmář
  * Link arch module with its specific PMU file

RFC:
  * Initial version for RFC

Wei Huang (4):
  KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
  KVM: x86/vPMU: Convert pmu.c code into Intel specific code
  KVM: x86/vPMU: Implement AMD PMU support for KVM
  KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs

 arch/x86/include/asm/kvm_host.h |  52 ++--
 arch/x86/kvm/Makefile           |   6 +-
 arch/x86/kvm/cpuid.c            |   2 +-
 arch/x86/kvm/lapic.c            |   1 +
 arch/x86/kvm/pmu.c              | 576 ---------------------------------------
 arch/x86/kvm/pmu_amd.c          | 390 ++++++++++++++++++++++++++
 arch/x86/kvm/pmu_intel.c        | 586 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm.c              |   7 +
 arch/x86/kvm/vmx.c              |   7 +
 arch/x86/kvm/x86.c              |  74 ++---
 10 files changed, 1070 insertions(+), 631 deletions(-)
 delete mode 100644 arch/x86/kvm/pmu.c
 create mode 100644 arch/x86/kvm/pmu_amd.c
 create mode 100644 arch/x86/kvm/pmu_intel.c

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH V1 1/4] KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
  2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
@ 2014-10-31 16:05 ` Wei Huang
  2014-10-31 16:05 ` [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code Wei Huang
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 13+ messages in thread
From: Wei Huang @ 2014-10-31 16:05 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, gleb, rkrcmar, wei

This patch defines a new function pointer struct (kvm_pmu_ops)
to support vPMU for both Intel and AMD. The functions of this
new struct are well self-explaned by their names. In the
meanwhile the struct that maps from event_sel bits to
PERF_TYPE_HARDWARE events is moved from Intel specific code
to kvm_host.h as a common struct now.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 35 +++++++++++++++++++++++++++++------
 arch/x86/kvm/pmu.c              | 21 ++++++++-------------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..e085cf8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -299,6 +299,12 @@ struct kvm_mmu {
 	u64 pdptrs[4]; /* pae */
 };
 
+struct msr_data {
+	bool host_initiated;
+	u32 index;
+	u64 data;
+};
+
 enum pmc_type {
 	KVM_PMC_GP = 0,
 	KVM_PMC_FIXED,
@@ -313,6 +319,29 @@ struct kvm_pmc {
 	struct kvm_vcpu *vcpu;
 };
 
+/* mapping from event selection to PERF_TYPE_HARDWARE events */
+struct kvm_event_hw_type_mapping {
+	u8 eventsel;
+	u8 unit_mask;
+	unsigned event_type;
+	bool inexact;
+};
+
+struct kvm_pmu_ops {
+	void (*pmu_cpuid_update)(struct kvm_vcpu *vcpu);
+	int (*pmu_check_pmc)(struct kvm_vcpu *vcpu, unsigned pmc);
+	int (*pmu_read_pmc)(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
+	int (*pmu_set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
+	int (*pmu_get_msr)(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+	bool (*is_pmu_msr)(struct kvm_vcpu *vcpu, u32 msr);
+	void (*pmu_deliver_pmi)(struct kvm_vcpu *vcpu);
+	void (*pmu_handle_event)(struct kvm_vcpu *vcpu);
+	void (*pmu_reset)(struct kvm_vcpu *vcpu);
+	void (*pmu_init)(struct kvm_vcpu *vcpu);
+	void (*pmu_destroy)(struct kvm_vcpu *vcpu);
+};
+
+/* used by both Intel and AMD; but some fields are only applicable to Intel. */
 struct kvm_pmu {
 	unsigned nr_arch_gp_counters;
 	unsigned nr_arch_fixed_counters;
@@ -653,12 +682,6 @@ struct kvm_vcpu_stat {
 
 struct x86_instruction_info;
 
-struct msr_data {
-	bool host_initiated;
-	u32 index;
-	u64 data;
-};
-
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d8..b8c7db7 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -20,12 +20,7 @@
 #include "cpuid.h"
 #include "lapic.h"
 
-static struct kvm_arch_event_perf_mapping {
-	u8 eventsel;
-	u8 unit_mask;
-	unsigned event_type;
-	bool inexact;
-} arch_events[] = {
+struct kvm_event_hw_type_mapping intel_arch_events[] = {
 	/* Index must match CPUID 0x0A.EBX bit vector */
 	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
 	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
@@ -37,7 +32,7 @@ static struct kvm_arch_event_perf_mapping {
 	[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
 };
 
-/* mapping between fixed pmc index and arch_events array */
+/* mapping between fixed pmc index and intel_arch_events array */
 int fixed_pmc_events[] = {1, 0, 7};
 
 static bool pmc_is_gp(struct kvm_pmc *pmc)
@@ -202,16 +197,16 @@ static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(arch_events); i++)
-		if (arch_events[i].eventsel == event_select
-				&& arch_events[i].unit_mask == unit_mask
+	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
+		if (intel_arch_events[i].eventsel == event_select
+				&& intel_arch_events[i].unit_mask == unit_mask
 				&& (pmu->available_event_types & (1 << i)))
 			break;
 
-	if (i == ARRAY_SIZE(arch_events))
+	if (i == ARRAY_SIZE(intel_arch_events))
 		return PERF_COUNT_HW_MAX;
 
-	return arch_events[i].event_type;
+	return intel_arch_events[i].event_type;
 }
 
 static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
@@ -265,7 +260,7 @@ static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
 		return;
 
 	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-			arch_events[fixed_pmc_events[idx]].event_type,
+			intel_arch_events[fixed_pmc_events[idx]].event_type,
 			!(en & 0x2), /* exclude user */
 			!(en & 0x1), /* exclude kernel */
 			pmi, false, false);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code
  2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
  2014-10-31 16:05 ` [PATCH V1 1/4] KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch Wei Huang
@ 2014-10-31 16:05 ` Wei Huang
  2014-11-03 18:36   ` Radim Krčmář
  2014-10-31 16:05 ` [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM Wei Huang
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: Wei Huang @ 2014-10-31 16:05 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, gleb, rkrcmar, wei

This patch converts existing pmu.c into Intel specific code and hooks
up with the PMU interface using the following steps:

- Convert pmu.c to pmu_intel.c; All public PMU functions are renamed
  and hooked up with the newly defined intel_pmu_ops.
- Create a corresponding pmu_amd.c file with empty functions for AMD
  arch.
- The PMU function pointer, kvm_pmu_ops, is initialized by calling
  kvm_x86_ops->get_pmu_ops().
- To reduce the code size, Intel and AMD modules are now generated
  from their corrponding arch and PMU files; In the meanwhile, due
  to this arrangement several, functions are exposed as public ones
  to allow calling from PMU code.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  17 +-
 arch/x86/kvm/Makefile           |   6 +-
 arch/x86/kvm/cpuid.c            |   2 +-
 arch/x86/kvm/lapic.c            |   1 +
 arch/x86/kvm/pmu.c              | 571 ---------------------------------------
 arch/x86/kvm/pmu_amd.c          |  86 ++++++
 arch/x86/kvm/pmu_intel.c        | 586 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm.c              |   7 +
 arch/x86/kvm/vmx.c              |   7 +
 arch/x86/kvm/x86.c              |  40 +--
 10 files changed, 719 insertions(+), 604 deletions(-)
 delete mode 100644 arch/x86/kvm/pmu.c
 create mode 100644 arch/x86/kvm/pmu_amd.c
 create mode 100644 arch/x86/kvm/pmu_intel.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e085cf8..612a16a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -794,6 +794,8 @@ struct kvm_x86_ops {
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
 	void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
+
+	struct kvm_pmu_ops *(*get_pmu_ops)(void);
 };
 
 struct kvm_arch_async_pf {
@@ -804,6 +806,7 @@ struct kvm_arch_async_pf {
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
+extern struct kvm_pmu_ops *kvm_pmu_ops;
 
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
 					   s64 adjustment)
@@ -1104,16 +1107,6 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
 int kvm_is_in_guest(void);
 
-void kvm_pmu_init(struct kvm_vcpu *vcpu);
-void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
-void kvm_pmu_reset(struct kvm_vcpu *vcpu);
-void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
-bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
-int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
-int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
-int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
-void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
-
+extern struct kvm_pmu_ops intel_pmu_ops;
+extern struct kvm_pmu_ops amd_pmu_ops;
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2..1d3df15 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,9 +14,9 @@ kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
-kvm-intel-y		+= vmx.o
-kvm-amd-y		+= svm.o
+			   i8254.o cpuid.o
+kvm-intel-y		+= vmx.o pmu_intel.o
+kvm-amd-y		+= svm.o pmu_amd.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a5..d6529a9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -97,7 +97,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		((best->eax & 0xff00) >> 8) != 0)
 		return -EINVAL;
 
-	kvm_pmu_cpuid_update(vcpu);
+	kvm_pmu_ops->pmu_cpuid_update(vcpu);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd..99d0edd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1520,6 +1520,7 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 	}
 	return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_apic_local_deliver);
 
 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
deleted file mode 100644
index b8c7db7..0000000
--- a/arch/x86/kvm/pmu.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * Kernel-based Virtual Machine -- Performance Monitoring Unit support
- *
- * Copyright 2011 Red Hat, Inc. and/or its affiliates.
- *
- * Authors:
- *   Avi Kivity   <avi@redhat.com>
- *   Gleb Natapov <gleb@redhat.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/types.h>
-#include <linux/kvm_host.h>
-#include <linux/perf_event.h>
-#include <asm/perf_event.h>
-#include "x86.h"
-#include "cpuid.h"
-#include "lapic.h"
-
-struct kvm_event_hw_type_mapping intel_arch_events[] = {
-	/* Index must match CPUID 0x0A.EBX bit vector */
-	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
-	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
-	[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
-	[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
-	[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
-	[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
-	[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
-	[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
-};
-
-/* mapping between fixed pmc index and intel_arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
-
-static bool pmc_is_gp(struct kvm_pmc *pmc)
-{
-	return pmc->type == KVM_PMC_GP;
-}
-
-static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
-{
-	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
-
-	return pmu->counter_bitmask[pmc->type];
-}
-
-static inline bool pmc_enabled(struct kvm_pmc *pmc)
-{
-	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
-	return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
-}
-
-static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
-					 u32 base)
-{
-	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
-		return &pmu->gp_counters[msr - base];
-	return NULL;
-}
-
-static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
-{
-	int base = MSR_CORE_PERF_FIXED_CTR0;
-	if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
-		return &pmu->fixed_counters[msr - base];
-	return NULL;
-}
-
-static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
-{
-	return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx);
-}
-
-static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
-{
-	if (idx < INTEL_PMC_IDX_FIXED)
-		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
-	else
-		return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);
-}
-
-void kvm_deliver_pmi(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.apic)
-		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
-}
-
-static void trigger_pmi(struct irq_work *irq_work)
-{
-	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
-			irq_work);
-	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
-			arch.pmu);
-
-	kvm_deliver_pmi(vcpu);
-}
-
-static void kvm_perf_overflow(struct perf_event *perf_event,
-			      struct perf_sample_data *data,
-			      struct pt_regs *regs)
-{
-	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
-	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
-	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
-		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
-		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
-	}
-}
-
-static void kvm_perf_overflow_intr(struct perf_event *perf_event,
-		struct perf_sample_data *data, struct pt_regs *regs)
-{
-	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
-	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
-	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
-		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
-		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
-		/*
-		 * Inject PMI. If vcpu was in a guest mode during NMI PMI
-		 * can be ejected on a guest mode re-entry. Otherwise we can't
-		 * be sure that vcpu wasn't executing hlt instruction at the
-		 * time of vmexit and is not going to re-enter guest mode until,
-		 * woken up. So we should wake it, but this is impossible from
-		 * NMI context. Do it from irq work instead.
-		 */
-		if (!kvm_is_in_guest())
-			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
-		else
-			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
-	}
-}
-
-static u64 read_pmc(struct kvm_pmc *pmc)
-{
-	u64 counter, enabled, running;
-
-	counter = pmc->counter;
-
-	if (pmc->perf_event)
-		counter += perf_event_read_value(pmc->perf_event,
-						 &enabled, &running);
-
-	/* FIXME: Scaling needed? */
-
-	return counter & pmc_bitmask(pmc);
-}
-
-static void stop_counter(struct kvm_pmc *pmc)
-{
-	if (pmc->perf_event) {
-		pmc->counter = read_pmc(pmc);
-		perf_event_release_kernel(pmc->perf_event);
-		pmc->perf_event = NULL;
-	}
-}
-
-static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
-		unsigned config, bool exclude_user, bool exclude_kernel,
-		bool intr, bool in_tx, bool in_tx_cp)
-{
-	struct perf_event *event;
-	struct perf_event_attr attr = {
-		.type = type,
-		.size = sizeof(attr),
-		.pinned = true,
-		.exclude_idle = true,
-		.exclude_host = 1,
-		.exclude_user = exclude_user,
-		.exclude_kernel = exclude_kernel,
-		.config = config,
-	};
-	if (in_tx)
-		attr.config |= HSW_IN_TX;
-	if (in_tx_cp)
-		attr.config |= HSW_IN_TX_CHECKPOINTED;
-
-	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
-
-	event = perf_event_create_kernel_counter(&attr, -1, current,
-						 intr ? kvm_perf_overflow_intr :
-						 kvm_perf_overflow, pmc);
-	if (IS_ERR(event)) {
-		printk_once("kvm: pmu event creation failed %ld\n",
-				PTR_ERR(event));
-		return;
-	}
-
-	pmc->perf_event = event;
-	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
-}
-
-static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
-		u8 unit_mask)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
-		if (intel_arch_events[i].eventsel == event_select
-				&& intel_arch_events[i].unit_mask == unit_mask
-				&& (pmu->available_event_types & (1 << i)))
-			break;
-
-	if (i == ARRAY_SIZE(intel_arch_events))
-		return PERF_COUNT_HW_MAX;
-
-	return intel_arch_events[i].event_type;
-}
-
-static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
-{
-	unsigned config, type = PERF_TYPE_RAW;
-	u8 event_select, unit_mask;
-
-	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
-		printk_once("kvm pmu: pin control bit is ignored\n");
-
-	pmc->eventsel = eventsel;
-
-	stop_counter(pmc);
-
-	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
-		return;
-
-	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
-	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
-
-	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
-				ARCH_PERFMON_EVENTSEL_INV |
-				ARCH_PERFMON_EVENTSEL_CMASK |
-				HSW_IN_TX |
-				HSW_IN_TX_CHECKPOINTED))) {
-		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
-				unit_mask);
-		if (config != PERF_COUNT_HW_MAX)
-			type = PERF_TYPE_HARDWARE;
-	}
-
-	if (type == PERF_TYPE_RAW)
-		config = eventsel & X86_RAW_EVENT_MASK;
-
-	reprogram_counter(pmc, type, config,
-			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-			eventsel & ARCH_PERFMON_EVENTSEL_INT,
-			(eventsel & HSW_IN_TX),
-			(eventsel & HSW_IN_TX_CHECKPOINTED));
-}
-
-static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
-{
-	unsigned en = en_pmi & 0x3;
-	bool pmi = en_pmi & 0x8;
-
-	stop_counter(pmc);
-
-	if (!en || !pmc_enabled(pmc))
-		return;
-
-	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
-			intel_arch_events[fixed_pmc_events[idx]].event_type,
-			!(en & 0x2), /* exclude user */
-			!(en & 0x1), /* exclude kernel */
-			pmi, false, false);
-}
-
-static inline u8 fixed_en_pmi(u64 ctrl, int idx)
-{
-	return (ctrl >> (idx * 4)) & 0xf;
-}
-
-static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
-{
-	int i;
-
-	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
-		u8 en_pmi = fixed_en_pmi(data, i);
-		struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i);
-
-		if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi)
-			continue;
-
-		reprogram_fixed_counter(pmc, en_pmi, i);
-	}
-
-	pmu->fixed_ctr_ctrl = data;
-}
-
-static void reprogram_idx(struct kvm_pmu *pmu, int idx)
-{
-	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
-
-	if (!pmc)
-		return;
-
-	if (pmc_is_gp(pmc))
-		reprogram_gp_counter(pmc, pmc->eventsel);
-	else {
-		int fidx = idx - INTEL_PMC_IDX_FIXED;
-		reprogram_fixed_counter(pmc,
-				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
-	}
-}
-
-static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
-{
-	int bit;
-	u64 diff = pmu->global_ctrl ^ data;
-
-	pmu->global_ctrl = data;
-
-	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
-		reprogram_idx(pmu, bit);
-}
-
-bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	int ret;
-
-	switch (msr) {
-	case MSR_CORE_PERF_FIXED_CTR_CTRL:
-	case MSR_CORE_PERF_GLOBAL_STATUS:
-	case MSR_CORE_PERF_GLOBAL_CTRL:
-	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-		ret = pmu->version > 1;
-		break;
-	default:
-		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
-			|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
-			|| get_fixed_pmc(pmu, msr);
-		break;
-	}
-	return ret;
-}
-
-int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc;
-
-	switch (index) {
-	case MSR_CORE_PERF_FIXED_CTR_CTRL:
-		*data = pmu->fixed_ctr_ctrl;
-		return 0;
-	case MSR_CORE_PERF_GLOBAL_STATUS:
-		*data = pmu->global_status;
-		return 0;
-	case MSR_CORE_PERF_GLOBAL_CTRL:
-		*data = pmu->global_ctrl;
-		return 0;
-	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-		*data = pmu->global_ovf_ctrl;
-		return 0;
-	default:
-		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
-				(pmc = get_fixed_pmc(pmu, index))) {
-			*data = read_pmc(pmc);
-			return 0;
-		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
-			*data = pmc->eventsel;
-			return 0;
-		}
-	}
-	return 1;
-}
-
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc;
-	u32 index = msr_info->index;
-	u64 data = msr_info->data;
-
-	switch (index) {
-	case MSR_CORE_PERF_FIXED_CTR_CTRL:
-		if (pmu->fixed_ctr_ctrl == data)
-			return 0;
-		if (!(data & 0xfffffffffffff444ull)) {
-			reprogram_fixed_counters(pmu, data);
-			return 0;
-		}
-		break;
-	case MSR_CORE_PERF_GLOBAL_STATUS:
-		if (msr_info->host_initiated) {
-			pmu->global_status = data;
-			return 0;
-		}
-		break; /* RO MSR */
-	case MSR_CORE_PERF_GLOBAL_CTRL:
-		if (pmu->global_ctrl == data)
-			return 0;
-		if (!(data & pmu->global_ctrl_mask)) {
-			global_ctrl_changed(pmu, data);
-			return 0;
-		}
-		break;
-	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
-		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
-			if (!msr_info->host_initiated)
-				pmu->global_status &= ~data;
-			pmu->global_ovf_ctrl = data;
-			return 0;
-		}
-		break;
-	default:
-		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
-				(pmc = get_fixed_pmc(pmu, index))) {
-			if (!msr_info->host_initiated)
-				data = (s64)(s32)data;
-			pmc->counter += data - read_pmc(pmc);
-			return 0;
-		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
-			if (data == pmc->eventsel)
-				return 0;
-			if (!(data & pmu->reserved_bits)) {
-				reprogram_gp_counter(pmc, data);
-				return 0;
-			}
-		}
-	}
-	return 1;
-}
-
-int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	bool fixed = pmc & (1u << 30);
-	pmc &= ~(3u << 30);
-	return (!fixed && pmc >= pmu->nr_arch_gp_counters) ||
-		(fixed && pmc >= pmu->nr_arch_fixed_counters);
-}
-
-int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	bool fast_mode = pmc & (1u << 31);
-	bool fixed = pmc & (1u << 30);
-	struct kvm_pmc *counters;
-	u64 ctr;
-
-	pmc &= ~(3u << 30);
-	if (!fixed && pmc >= pmu->nr_arch_gp_counters)
-		return 1;
-	if (fixed && pmc >= pmu->nr_arch_fixed_counters)
-		return 1;
-	counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
-	ctr = read_pmc(&counters[pmc]);
-	if (fast_mode)
-		ctr = (u32)ctr;
-	*data = ctr;
-
-	return 0;
-}
-
-void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_cpuid_entry2 *entry;
-	union cpuid10_eax eax;
-	union cpuid10_edx edx;
-
-	pmu->nr_arch_gp_counters = 0;
-	pmu->nr_arch_fixed_counters = 0;
-	pmu->counter_bitmask[KVM_PMC_GP] = 0;
-	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
-	pmu->version = 0;
-	pmu->reserved_bits = 0xffffffff00200000ull;
-
-	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
-	if (!entry)
-		return;
-	eax.full = entry->eax;
-	edx.full = entry->edx;
-
-	pmu->version = eax.split.version_id;
-	if (!pmu->version)
-		return;
-
-	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
-					INTEL_PMC_MAX_GENERIC);
-	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
-	pmu->available_event_types = ~entry->ebx &
-					((1ull << eax.split.mask_length) - 1);
-
-	if (pmu->version == 1) {
-		pmu->nr_arch_fixed_counters = 0;
-	} else {
-		pmu->nr_arch_fixed_counters =
-			min_t(int, edx.split.num_counters_fixed,
-				INTEL_PMC_MAX_FIXED);
-		pmu->counter_bitmask[KVM_PMC_FIXED] =
-			((u64)1 << edx.split.bit_width_fixed) - 1;
-	}
-
-	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
-		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
-	pmu->global_ctrl_mask = ~pmu->global_ctrl;
-
-	entry = kvm_find_cpuid_entry(vcpu, 7, 0);
-	if (entry &&
-	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
-	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
-		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
-}
-
-void kvm_pmu_init(struct kvm_vcpu *vcpu)
-{
-	int i;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-
-	memset(pmu, 0, sizeof(*pmu));
-	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
-		pmu->gp_counters[i].type = KVM_PMC_GP;
-		pmu->gp_counters[i].vcpu = vcpu;
-		pmu->gp_counters[i].idx = i;
-	}
-	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
-		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
-		pmu->fixed_counters[i].vcpu = vcpu;
-		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
-	}
-	init_irq_work(&pmu->irq_work, trigger_pmi);
-	kvm_pmu_cpuid_update(vcpu);
-}
-
-void kvm_pmu_reset(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	int i;
-
-	irq_work_sync(&pmu->irq_work);
-	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
-		struct kvm_pmc *pmc = &pmu->gp_counters[i];
-		stop_counter(pmc);
-		pmc->counter = pmc->eventsel = 0;
-	}
-
-	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
-		stop_counter(&pmu->fixed_counters[i]);
-
-	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
-		pmu->global_ovf_ctrl = 0;
-}
-
-void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_pmu_reset(vcpu);
-}
-
-void kvm_handle_pmu_event(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	u64 bitmask;
-	int bit;
-
-	bitmask = pmu->reprogram_pmi;
-
-	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
-		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
-
-		if (unlikely(!pmc || !pmc->perf_event)) {
-			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
-			continue;
-		}
-
-		reprogram_idx(pmu, bit);
-	}
-}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
new file mode 100644
index 0000000..6d6e1c3
--- /dev/null
+++ b/arch/x86/kvm/pmu_amd.c
@@ -0,0 +1,86 @@
+/*
+ * KVM -- Performance Monitoring Unit support for AMD
+ *
+ * Copyright 2014, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang    <wei@redhat.com>
+ *
+ * Implementation is based on pmu_intel.c file
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include <asm/perf_event.h>
+#include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
+
+void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+}
+
+int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
+{
+	return 1;
+}
+
+int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	return 1;
+}
+
+int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	return 1;
+}
+
+int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	return 1;
+}
+
+bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	return 0;
+}
+
+void amd_deliver_pmi(struct kvm_vcpu *vcpu)
+{
+}
+
+void amd_handle_pmu_event(struct kvm_vcpu *vcpu)
+{
+}
+
+void amd_pmu_reset(struct kvm_vcpu *vcpu)
+{
+}
+
+void amd_pmu_init(struct kvm_vcpu *vcpu)
+{
+}
+
+void amd_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+}
+
+struct kvm_pmu_ops amd_pmu_ops = {
+	.pmu_cpuid_update = amd_pmu_cpuid_update,
+	.pmu_check_pmc = amd_pmu_check_pmc,
+	.pmu_read_pmc = amd_pmu_read_pmc,
+	.pmu_set_msr = amd_pmu_set_msr,
+	.pmu_get_msr = amd_pmu_get_msr,
+	.is_pmu_msr = amd_is_pmu_msr,
+	.pmu_deliver_pmi = amd_deliver_pmi,
+	.pmu_handle_event = amd_handle_pmu_event,
+	.pmu_reset = amd_pmu_reset,
+	.pmu_init = amd_pmu_init,
+	.pmu_destroy = amd_pmu_destroy,
+};
+EXPORT_SYMBOL_GPL(amd_pmu_ops);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
new file mode 100644
index 0000000..1326c91
--- /dev/null
+++ b/arch/x86/kvm/pmu_intel.c
@@ -0,0 +1,586 @@
+/*
+ * KVM -- Performance Monitoring Unit support for Intel
+ *
+ * Copyright 2011 - 2014, Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@redhat.com>
+ *   Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include <asm/perf_event.h>
+#include "x86.h"
+#include "cpuid.h"
+#include "lapic.h"
+
+struct kvm_event_hw_type_mapping intel_arch_events[] = {
+	/* Index must match CPUID 0x0A.EBX bit vector */
+	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+	[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
+	[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
+	[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
+	[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+	[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+	[7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
+};
+
+/* mapping between fixed pmc index and intel_arch_events array */
+int fixed_pmc_events[] = {1, 0, 7};
+
+static bool pmc_is_gp(struct kvm_pmc *pmc)
+{
+	return pmc->type == KVM_PMC_GP;
+}
+
+static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	return pmu->counter_bitmask[pmc->type];
+}
+
+static inline bool pmc_enabled(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+}
+
+static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+					 u32 base)
+{
+	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+		return &pmu->gp_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
+{
+	int base = MSR_CORE_PERF_FIXED_CTR0;
+	if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
+		return &pmu->fixed_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc_idx(struct kvm_pmu *pmu, int idx)
+{
+	return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx);
+}
+
+static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+	if (idx < INTEL_PMC_IDX_FIXED)
+		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
+	else
+		return get_fixed_pmc_idx(pmu, idx - INTEL_PMC_IDX_FIXED);
+}
+
+void intel_deliver_pmi(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.apic)
+		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
+}
+
+static void trigger_pmi(struct irq_work *irq_work)
+{
+	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
+			irq_work);
+	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
+			arch.pmu);
+
+	intel_deliver_pmi(vcpu);
+}
+
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
+		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+	}
+}
+
+static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	if (!test_and_set_bit(pmc->idx, (unsigned long *)&pmu->reprogram_pmi)) {
+		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+		/*
+		 * Inject PMI. If vcpu was in a guest mode during NMI PMI
+		 * can be ejected on a guest mode re-entry. Otherwise we can't
+		 * be sure that vcpu wasn't executing hlt instruction at the
+		 * time of vmexit and is not going to re-enter guest mode until,
+		 * woken up. So we should wake it, but this is impossible from
+		 * NMI context. Do it from irq work instead.
+		 */
+		if (!kvm_is_in_guest())
+			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
+		else
+			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+	}
+}
+
+static u64 read_pmc(struct kvm_pmc *pmc)
+{
+	u64 counter, enabled, running;
+
+	counter = pmc->counter;
+
+	if (pmc->perf_event)
+		counter += perf_event_read_value(pmc->perf_event,
+						 &enabled, &running);
+
+	/* FIXME: Scaling needed? */
+
+	return counter & pmc_bitmask(pmc);
+}
+
+static void stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = read_pmc(pmc);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
+		unsigned config, bool exclude_user, bool exclude_kernel,
+		bool intr, bool in_tx, bool in_tx_cp)
+{
+	struct perf_event *event;
+	struct perf_event_attr attr = {
+		.type = type,
+		.size = sizeof(attr),
+		.pinned = true,
+		.exclude_idle = true,
+		.exclude_host = 1,
+		.exclude_user = exclude_user,
+		.exclude_kernel = exclude_kernel,
+		.config = config,
+	};
+	if (in_tx)
+		attr.config |= HSW_IN_TX;
+	if (in_tx_cp)
+		attr.config |= HSW_IN_TX_CHECKPOINTED;
+
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
+	event = perf_event_create_kernel_counter(&attr, -1, current,
+						 intr ? kvm_perf_overflow_intr :
+						 kvm_perf_overflow, pmc);
+	if (IS_ERR(event)) {
+		printk_once("kvm: pmu event creation failed %ld\n",
+				PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
+}
+
+static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
+		u8 unit_mask)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
+		if (intel_arch_events[i].eventsel == event_select
+				&& intel_arch_events[i].unit_mask == unit_mask
+				&& (pmu->available_event_types & (1 << i)))
+			break;
+
+	if (i == ARRAY_SIZE(intel_arch_events))
+		return PERF_COUNT_HW_MAX;
+
+	return intel_arch_events[i].event_type;
+}
+
+static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+{
+	unsigned config, type = PERF_TYPE_RAW;
+	u8 event_select, unit_mask;
+
+	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+		printk_once("kvm pmu: pin control bit is ignored\n");
+
+	pmc->eventsel = eventsel;
+
+	stop_counter(pmc);
+
+	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
+		return;
+
+	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+
+	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
+				ARCH_PERFMON_EVENTSEL_INV |
+				ARCH_PERFMON_EVENTSEL_CMASK |
+				HSW_IN_TX |
+				HSW_IN_TX_CHECKPOINTED))) {
+		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
+				unit_mask);
+		if (config != PERF_COUNT_HW_MAX)
+			type = PERF_TYPE_HARDWARE;
+	}
+
+	if (type == PERF_TYPE_RAW)
+		config = eventsel & X86_RAW_EVENT_MASK;
+
+	reprogram_counter(pmc, type, config,
+			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			eventsel & ARCH_PERFMON_EVENTSEL_INT,
+			(eventsel & HSW_IN_TX),
+			(eventsel & HSW_IN_TX_CHECKPOINTED));
+}
+
+static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
+{
+	unsigned en = en_pmi & 0x3;
+	bool pmi = en_pmi & 0x8;
+
+	stop_counter(pmc);
+
+	if (!en || !pmc_enabled(pmc))
+		return;
+
+	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
+			intel_arch_events[fixed_pmc_events[idx]].event_type,
+			!(en & 0x2), /* exclude user */
+			!(en & 0x1), /* exclude kernel */
+			pmi, false, false);
+}
+
+static inline u8 fixed_en_pmi(u64 ctrl, int idx)
+{
+	return (ctrl >> (idx * 4)) & 0xf;
+}
+
+static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+		u8 en_pmi = fixed_en_pmi(data, i);
+		struct kvm_pmc *pmc = get_fixed_pmc_idx(pmu, i);
+
+		if (fixed_en_pmi(pmu->fixed_ctr_ctrl, i) == en_pmi)
+			continue;
+
+		reprogram_fixed_counter(pmc, en_pmi, i);
+	}
+
+	pmu->fixed_ctr_ctrl = data;
+}
+
+static void reprogram_idx(struct kvm_pmu *pmu, int idx)
+{
+	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
+
+	if (!pmc)
+		return;
+
+	if (pmc_is_gp(pmc))
+		reprogram_gp_counter(pmc, pmc->eventsel);
+	else {
+		int fidx = idx - INTEL_PMC_IDX_FIXED;
+		reprogram_fixed_counter(pmc,
+				fixed_en_pmi(pmu->fixed_ctr_ctrl, fidx), fidx);
+	}
+}
+
+static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+{
+	int bit;
+	u64 diff = pmu->global_ctrl ^ data;
+
+	pmu->global_ctrl = data;
+
+	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+		reprogram_idx(pmu, bit);
+}
+
+bool intel_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int ret;
+
+	switch (msr) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		ret = pmu->version > 1;
+		break;
+	default:
+		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
+			|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
+			|| get_fixed_pmc(pmu, msr);
+		break;
+	}
+	return ret;
+}
+
+int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		*data = pmu->fixed_ctr_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		*data = pmu->global_status;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		*data = pmu->global_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		*data = pmu->global_ovf_ctrl;
+		return 0;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			*data = read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			*data = pmc->eventsel;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+	u32 index = msr_info->index;
+	u64 data = msr_info->data;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		if (pmu->fixed_ctr_ctrl == data)
+			return 0;
+		if (!(data & 0xfffffffffffff444ull)) {
+			reprogram_fixed_counters(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		if (msr_info->host_initiated) {
+			pmu->global_status = data;
+			return 0;
+		}
+		break; /* RO MSR */
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (pmu->global_ctrl == data)
+			return 0;
+		if (!(data & pmu->global_ctrl_mask)) {
+			global_ctrl_changed(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+			if (!msr_info->host_initiated)
+				pmu->global_status &= ~data;
+			pmu->global_ovf_ctrl = data;
+			return 0;
+		}
+		break;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			if (!msr_info->host_initiated)
+				data = (s64)(s32)data;
+			pmc->counter += data - read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			if (data == pmc->eventsel)
+				return 0;
+			if (!(data & pmu->reserved_bits)) {
+				reprogram_gp_counter(pmc, data);
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+int intel_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	bool fixed = pmc & (1u << 30);
+	pmc &= ~(3u << 30);
+	return (!fixed && pmc >= pmu->nr_arch_gp_counters) ||
+		(fixed && pmc >= pmu->nr_arch_fixed_counters);
+}
+
+int intel_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	bool fast_mode = pmc & (1u << 31);
+	bool fixed = pmc & (1u << 30);
+	struct kvm_pmc *counters;
+	u64 ctr;
+
+	pmc &= ~(3u << 30);
+	if (!fixed && pmc >= pmu->nr_arch_gp_counters)
+		return 1;
+	if (fixed && pmc >= pmu->nr_arch_fixed_counters)
+		return 1;
+	counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+	ctr = read_pmc(&counters[pmc]);
+	if (fast_mode)
+		ctr = (u32)ctr;
+	*data = ctr;
+
+	return 0;
+}
+
+void intel_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_cpuid_entry2 *entry;
+	union cpuid10_eax eax;
+	union cpuid10_edx edx;
+
+	pmu->nr_arch_gp_counters = 0;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->counter_bitmask[KVM_PMC_GP] = 0;
+	pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
+	pmu->version = 0;
+	pmu->reserved_bits = 0xffffffff00200000ull;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+	if (!entry)
+		return;
+	eax.full = entry->eax;
+	edx.full = entry->edx;
+
+	pmu->version = eax.split.version_id;
+	if (!pmu->version)
+		return;
+
+	pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+					INTEL_PMC_MAX_GENERIC);
+	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+	pmu->available_event_types = ~entry->ebx &
+					((1ull << eax.split.mask_length) - 1);
+
+	if (pmu->version == 1) {
+		pmu->nr_arch_fixed_counters = 0;
+	} else {
+		pmu->nr_arch_fixed_counters =
+			min_t(int, edx.split.num_counters_fixed,
+				INTEL_PMC_MAX_FIXED);
+		pmu->counter_bitmask[KVM_PMC_FIXED] =
+			((u64)1 << edx.split.bit_width_fixed) - 1;
+	}
+
+	pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+		(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
+	pmu->global_ctrl_mask = ~pmu->global_ctrl;
+
+	entry = kvm_find_cpuid_entry(vcpu, 7, 0);
+	if (entry &&
+	    (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
+	    (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
+		pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
+}
+
+void intel_pmu_init(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	memset(pmu, 0, sizeof(*pmu));
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+		pmu->gp_counters[i].type = KVM_PMC_GP;
+		pmu->gp_counters[i].vcpu = vcpu;
+		pmu->gp_counters[i].idx = i;
+	}
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
+		pmu->fixed_counters[i].vcpu = vcpu;
+		pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED;
+	}
+	init_irq_work(&pmu->irq_work, trigger_pmi);
+	intel_pmu_cpuid_update(vcpu);
+}
+
+void intel_pmu_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int i;
+
+	irq_work_sync(&pmu->irq_work);
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+		stop_counter(pmc);
+		pmc->counter = pmc->eventsel = 0;
+	}
+
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
+		stop_counter(&pmu->fixed_counters[i]);
+
+	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
+		pmu->global_ovf_ctrl = 0;
+}
+
+void intel_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+	intel_pmu_reset(vcpu);
+}
+
+void intel_handle_pmu_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	u64 bitmask;
+	int bit;
+
+	bitmask = pmu->reprogram_pmi;
+
+	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
+
+		if (unlikely(!pmc || !pmc->perf_event)) {
+			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			continue;
+		}
+
+		reprogram_idx(pmu, bit);
+	}
+}
+
+struct kvm_pmu_ops intel_pmu_ops = {
+	.pmu_cpuid_update = intel_pmu_cpuid_update,
+	.pmu_check_pmc = intel_pmu_check_pmc,
+	.pmu_read_pmc = intel_pmu_read_pmc,
+	.pmu_set_msr = intel_pmu_set_msr,
+	.pmu_get_msr = intel_pmu_get_msr,
+	.is_pmu_msr = intel_is_pmu_msr,
+	.pmu_deliver_pmi = intel_deliver_pmi,
+	.pmu_handle_event = intel_handle_pmu_event,
+	.pmu_reset = intel_pmu_reset,
+	.pmu_init = intel_pmu_init,
+	.pmu_destroy = intel_pmu_destroy,
+};
+EXPORT_SYMBOL_GPL(intel_pmu_ops);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cef..9fb6b37 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4326,6 +4326,11 @@ static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 }
 
+struct kvm_pmu_ops *svm_get_pmu_ops(void)
+{
+	return &amd_pmu_ops;
+}
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -4428,6 +4433,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.handle_external_intr = svm_handle_external_intr,
 
 	.sched_in = svm_sched_in,
+
+	.get_pmu_ops = svm_get_pmu_ops,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a0f78db..de43bf0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9073,6 +9073,11 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 		shrink_ple_window(vcpu);
 }
 
+static struct kvm_pmu_ops *vmx_get_pmu_ops(void)
+{
+	return &intel_pmu_ops;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -9180,6 +9185,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.check_nested_events = vmx_check_nested_events,
 
 	.sched_in = vmx_sched_in,
+
+	.get_pmu_ops = vmx_get_pmu_ops,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..e4ed76e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -91,6 +91,7 @@ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 
 struct kvm_x86_ops *kvm_x86_ops;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
+struct kvm_pmu_ops *kvm_pmu_ops;
 
 static bool ignore_msrs = 0;
 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
@@ -893,7 +894,7 @@ bool kvm_rdpmc(struct kvm_vcpu *vcpu)
 	u64 data;
 	int err;
 
-	err = kvm_pmu_read_pmc(vcpu, ecx, &data);
+	err = kvm_pmu_ops->pmu_read_pmc(vcpu, ecx, &data);
 	if (err)
 		return err;
 	kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
@@ -2248,8 +2249,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		pr = true;
 	case MSR_P6_EVNTSEL0:
 	case MSR_P6_EVNTSEL1:
-		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_set_msr(vcpu, msr_info);
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_set_msr(vcpu, msr_info);
 
 		if (pr || data != 0)
 			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
@@ -2294,8 +2295,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
-		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_set_msr(vcpu, msr_info);
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_set_msr(vcpu, msr_info);
 		if (!ignore_msrs) {
 			vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				    msr, data);
@@ -2487,8 +2488,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_P6_PERFCTR1:
 	case MSR_P6_EVNTSEL0:
 	case MSR_P6_EVNTSEL1:
-		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_get_msr(vcpu, msr, pdata);
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_get_msr(vcpu, msr, pdata);
 		data = 0;
 		break;
 	case MSR_IA32_UCODE_REV:
@@ -2610,8 +2611,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = vcpu->arch.osvw.status;
 		break;
 	default:
-		if (kvm_pmu_msr(vcpu, msr))
-			return kvm_pmu_get_msr(vcpu, msr, pdata);
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_get_msr(vcpu, msr, pdata);
 		if (!ignore_msrs) {
 			vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 			return 1;
@@ -4848,19 +4849,19 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 	msr.data = data;
 	msr.index = msr_index;
 	msr.host_initiated = false;
-	return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
+	return kvm_pmu_ops->pmu_set_msr(emul_to_vcpu(ctxt), &msr);
 }
 
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
 			      u32 pmc)
 {
-	return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
+	return kvm_pmu_ops->pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
 			     u32 pmc, u64 *pdata)
 {
-	return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
+	return kvm_pmu_ops->pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
 }
 
 static void emulator_halt(struct x86_emulate_ctxt *ctxt)
@@ -5557,6 +5558,7 @@ int kvm_is_in_guest(void)
 {
 	return __this_cpu_read(current_vcpu) != NULL;
 }
+EXPORT_SYMBOL_GPL(kvm_is_in_guest);
 
 static int kvm_is_user_mode(void)
 {
@@ -5713,6 +5715,9 @@ int kvm_arch_init(void *opaque)
 
 	kvm_timer_init();
 
+	/* init pointers of PMU operations */
+	kvm_pmu_ops = ops->get_pmu_ops();
+
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
 
 	if (cpu_has_xsave)
@@ -5743,6 +5748,7 @@ void kvm_arch_exit(void)
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
 #endif
 	kvm_x86_ops = NULL;
+	kvm_pmu_ops = NULL;
 	kvm_mmu_module_exit();
 	free_percpu(shared_msrs);
 }
@@ -6143,9 +6149,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
 			process_nmi(vcpu);
 		if (kvm_check_request(KVM_REQ_PMU, vcpu))
-			kvm_handle_pmu_event(vcpu);
+			kvm_pmu_ops->pmu_handle_event(vcpu);
 		if (kvm_check_request(KVM_REQ_PMI, vcpu))
-			kvm_deliver_pmi(vcpu);
+			kvm_pmu_ops->pmu_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
 		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
@@ -7015,7 +7021,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	vcpu->arch.apf.halted = false;
 
-	kvm_pmu_reset(vcpu);
+	kvm_pmu_ops->pmu_reset(vcpu);
 
 	memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
 	vcpu->arch.regs_avail = ~0;
@@ -7214,7 +7220,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
 	kvm_async_pf_hash_reset(vcpu);
-	kvm_pmu_init(vcpu);
+	kvm_pmu_ops->pmu_init(vcpu);
 
 	return 0;
 fail_free_wbinvd_dirty_mask:
@@ -7235,7 +7241,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
-	kvm_pmu_destroy(vcpu);
+	kvm_pmu_ops->pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM
  2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
  2014-10-31 16:05 ` [PATCH V1 1/4] KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch Wei Huang
  2014-10-31 16:05 ` [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code Wei Huang
@ 2014-10-31 16:05 ` Wei Huang
  2014-11-03 18:17   ` Radim Krčmář
  2014-10-31 16:05 ` [PATCH V1 4/4] KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs Wei Huang
  2014-11-03 17:56 ` [PATCH V1 0/4] KVM vPMU support for x86 Radim Krčmář
  4 siblings, 1 reply; 13+ messages in thread
From: Wei Huang @ 2014-10-31 16:05 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, gleb, rkrcmar, wei

This patch implemented vPMU for AMD platform. The design piggybacks
on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses
the parts of generic counters. The kvm_pmu_ops interface is also
initialized in this patch.

Signed-off-by: Wei Huang <wei@redhat.com>
---
 arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 318 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 6d6e1c3..19cfe26 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -16,58 +16,362 @@
 #include <linux/types.h>
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
-#include <asm/perf_event.h>
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
 
-void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+/* duplicated from amd_perfmon_event_map, K7 and above should work */
+static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
+	[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+	[2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
+	[3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
+	[4] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+	[5] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+	[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+	[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+
+static inline bool pmc_is_gp(struct kvm_pmc *pmc)
 {
+	return pmc->type == KVM_PMC_GP;
 }
 
-int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
+static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+					 u32 base)
 {
-	return 1;
+	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+		return &pmu->gp_counters[msr - base];
+
+	return NULL;
 }
 
-int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
 {
-	return 1;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	return pmu->counter_bitmask[pmc->type];
 }
 
-int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
 {
-	return 1;
+	return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0);
 }
 
-int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+void amd_deliver_pmi(struct kvm_vcpu *vcpu)
 {
-	return 1;
+	if (vcpu->arch.apic)
+		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
 }
 
-bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+static void trigger_pmi(struct irq_work *irq_work)
 {
-	return 0;
+	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
+			irq_work);
+	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
+			arch.pmu);
+
+	amd_deliver_pmi(vcpu);
 }
 
-void amd_deliver_pmi(struct kvm_vcpu *vcpu)
+static u64 read_pmc(struct kvm_pmc *pmc)
+{
+	u64 counter, enabled, running, result, delta, prev;
+
+	counter = pmc->counter;
+	prev = pmc->counter;
+
+	if (pmc->perf_event) {
+		delta = perf_event_read_value(pmc->perf_event,
+					      &enabled, &running);
+		counter += delta;
+	}
+
+	result = counter & pmc_bitmask(pmc);
+
+	return result;
+}
+
+static void stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = read_pmc(pmc);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_select,
+				u8 unit_mask)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
+		if (amd_event_mapping[i].eventsel == event_select
+		    && amd_event_mapping[i].unit_mask == unit_mask)
+			break;
+
+	if (i == ARRAY_SIZE(amd_event_mapping))
+		return PERF_COUNT_HW_MAX;
+
+	return amd_event_mapping[i].event_type;
+}
+
+static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	if (!test_and_set_bit(pmc->idx,
+			      (unsigned long *)&pmu->reprogram_pmi)) {
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+
+		if (!kvm_is_in_guest())
+			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
+		else
+			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
+	}
+}
+
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	if (!test_and_set_bit(pmc->idx,
+			      (unsigned long *)&pmu->reprogram_pmi)) {
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+	}
+}
+
+static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
+		unsigned config, bool exclude_user, bool exclude_kernel,
+		bool intr)
+{
+	struct perf_event *event;
+	struct perf_event_attr attr = {
+		.type = type,
+		.size = sizeof(attr),
+		.pinned = true,
+		.exclude_idle = true,
+		.exclude_host = 1,
+		.exclude_user = exclude_user,
+		.exclude_kernel = exclude_kernel,
+		.config = config,
+	};
+
+	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
+
+	event = perf_event_create_kernel_counter(&attr, -1, current,
+						 intr?kvm_perf_overflow_intr :
+						 kvm_perf_overflow, pmc);
+	if (IS_ERR(event)) {
+		printk_once("kvm: pmu event creation failed %ld\n",
+			    PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+	clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogram_pmi);
+}
+
+
+static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+{
+	unsigned config, type = PERF_TYPE_RAW;
+	u8 event_select, unit_mask;
+
+	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+		printk_once("kvm pmu: pin control bit is ignored\n");
+
+	pmc->eventsel = eventsel;
+
+	stop_counter(pmc);
+
+	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE))
+		return;
+
+	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+
+	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
+			  ARCH_PERFMON_EVENTSEL_INV |
+			  ARCH_PERFMON_EVENTSEL_CMASK |
+			  HSW_IN_TX |
+			  HSW_IN_TX_CHECKPOINTED))) {
+		config = find_hw_type_event(&pmc->vcpu->arch.pmu, event_select,
+					 unit_mask);
+		if (config != PERF_COUNT_HW_MAX)
+			type = PERF_TYPE_HARDWARE;
+	}
+
+	if (type == PERF_TYPE_RAW)
+		config = eventsel & X86_RAW_EVENT_MASK;
+
+	reprogram_counter(pmc, type, config,
+			  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			  eventsel & ARCH_PERFMON_EVENTSEL_INT);
+}
+
+static void reprogram_idx(struct kvm_pmu *pmu, int idx)
 {
+	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
+
+	if (!pmc || !pmc_is_gp(pmc))
+		return;
+	reprogram_gp_counter(pmc, pmc->eventsel);
 }
 
 void amd_handle_pmu_event(struct kvm_vcpu *vcpu)
 {
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	u64 bitmask;
+	int bit;
+
+	bitmask = pmu->reprogram_pmi;
+
+	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
+
+		if (unlikely(!pmc || !pmc->perf_event)) {
+			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			continue;
+		}
+
+		reprogram_idx(pmu, bit);
+	}
 }
 
 void amd_pmu_reset(struct kvm_vcpu *vcpu)
 {
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int i;
+
+	irq_work_sync(&pmu->irq_work);
+
+	for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
+		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+
+		stop_counter(pmc);
+		pmc->counter = pmc->eventsel = 0;
+	}
+}
+
+void amd_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+	amd_pmu_reset(vcpu);
+}
+
+int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	pmc &= ~(3u << 30);
+	return (pmc >= pmu->nr_arch_gp_counters);
+}
+
+int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *counters;
+	u64 ctr;
+
+	pmc &= ~(3u << 30);
+	if (pmc >= pmu->nr_arch_gp_counters)
+		return 1;
+
+	counters = pmu->gp_counters;
+	ctr = read_pmc(&counters[pmc]);
+	*data = ctr;
+
+	return 0;
+}
+
+int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+	u32 index = msr_info->index;
+	u64 data = msr_info->data;
+
+	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
+		if (!msr_info->host_initiated)
+			data = (s64)data;
+		pmc->counter += data - read_pmc(pmc);
+		return 0;
+	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
+		if (data == pmc->eventsel)
+			return 0;
+		if (!(data & pmu->reserved_bits)) {
+			reprogram_gp_counter(pmc, data);
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
+		*data = read_pmc(pmc);
+		return 0;
+	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
+		*data = pmc->eventsel;
+		return 0;
+	}
+
+	return 1;
+}
+
+void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	pmu->nr_arch_gp_counters = 0;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->counter_bitmask[KVM_PMC_GP] = 0;
+	pmu->version = 0;
+	pmu->reserved_bits = 0xffffffff00200000ull;
+
+	/* configuration */
+	pmu->nr_arch_gp_counters = 4;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
 }
 
 void amd_pmu_init(struct kvm_vcpu *vcpu)
 {
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	memset(pmu, 0, sizeof(*pmu));
+	for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
+		pmu->gp_counters[i].type = KVM_PMC_GP;
+		pmu->gp_counters[i].vcpu = vcpu;
+		pmu->gp_counters[i].idx = i;
+	}
+
+	init_irq_work(&pmu->irq_work, trigger_pmi);
+	amd_pmu_cpuid_update(vcpu);
 }
 
-void amd_pmu_destroy(struct kvm_vcpu *vcpu)
+bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
 {
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int ret;
+
+	ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
+		get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+
+	return ret;
 }
 
 struct kvm_pmu_ops amd_pmu_ops = {
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH V1 4/4] KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
  2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
                   ` (2 preceding siblings ...)
  2014-10-31 16:05 ` [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM Wei Huang
@ 2014-10-31 16:05 ` Wei Huang
  2014-11-03 17:56 ` [PATCH V1 0/4] KVM vPMU support for x86 Radim Krčmář
  4 siblings, 0 replies; 13+ messages in thread
From: Wei Huang @ 2014-10-31 16:05 UTC (permalink / raw)
  To: kvm; +Cc: pbonzini, gleb, rkrcmar, wei

This patch enables PMU handling (read/write) for AMD performance
counters, whic include PERFCTR[0..3] and EVNTSEL[0..3].

Signed-off-by: Wei Huang <wei@redhat.com>
---
 arch/x86/kvm/x86.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e4ed76e..fb445d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2226,24 +2226,22 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	 * which we perfectly emulate ;-). Any other value should be at least
 	 * reported, some guests depend on them.
 	 */
+	case MSR_K7_PERFCTR0:
+	case MSR_K7_PERFCTR1:
+	case MSR_K7_PERFCTR2:
+	case MSR_K7_PERFCTR3:
+		pr = true;
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
 	case MSR_K7_EVNTSEL3:
-		if (data != 0)
-			vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_set_msr(vcpu, msr_info);
+
+		if (pr || data != 0)
+			vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
 				    "0x%x data 0x%llx\n", msr, data);
 		break;
-	/* at least RHEL 4 unconditionally writes to the perfctr registers,
-	 * so we ignore writes to make it happy.
-	 */
-	case MSR_K7_PERFCTR0:
-	case MSR_K7_PERFCTR1:
-	case MSR_K7_PERFCTR2:
-	case MSR_K7_PERFCTR3:
-		vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
-			    "0x%x data 0x%llx\n", msr, data);
-		break;
 	case MSR_P6_PERFCTR0:
 	case MSR_P6_PERFCTR1:
 		pr = true;
@@ -2470,6 +2468,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K8_SYSCFG:
 	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
+	case MSR_K8_INT_PENDING_MSG:
+	case MSR_AMD64_NB_CFG:
+	case MSR_FAM10H_MMIO_CONF_BASE:
+	case MSR_AMD64_BU_CFG2:
+		data = 0;
+		break;
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
@@ -2478,10 +2482,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K7_PERFCTR1:
 	case MSR_K7_PERFCTR2:
 	case MSR_K7_PERFCTR3:
-	case MSR_K8_INT_PENDING_MSG:
-	case MSR_AMD64_NB_CFG:
-	case MSR_FAM10H_MMIO_CONF_BASE:
-	case MSR_AMD64_BU_CFG2:
+		if (kvm_pmu_ops->is_pmu_msr(vcpu, msr))
+			return kvm_pmu_ops->pmu_get_msr(vcpu, msr, pdata);
 		data = 0;
 		break;
 	case MSR_P6_PERFCTR0:
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 0/4] KVM vPMU support for x86
  2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
                   ` (3 preceding siblings ...)
  2014-10-31 16:05 ` [PATCH V1 4/4] KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs Wei Huang
@ 2014-11-03 17:56 ` Radim Krčmář
  2014-11-03 18:23   ` Wei Huang
  4 siblings, 1 reply; 13+ messages in thread
From: Radim Krčmář @ 2014-11-03 17:56 UTC (permalink / raw)
  To: Wei Huang; +Cc: kvm, pbonzini, gleb

2014-10-31 12:05-0400, Wei Huang:
> Currently KVM only supports vPMU for Intel platforms. This patch set 
> enable vPMU support for AMD platform by creating a common PMU
> interface for x86. The PMU calls from guest VMs are dispatched
> to corresponding functions defined in arch specific files.

The functionality looks good, so I just want verify the basic design:
why don't we emulate AMD PMU on Intel, and vice versa?
(Underlying PERF_COUNTs are identical in both.)

> V1:
>   * Adopt the file layout suggested by Radim Krčmář

(I'll still advocate for more separation, sorry.)

>   * Link arch module with its specific PMU file
> 
> RFC:
>   * Initial version for RFC
> 
> Wei Huang (4):
>   KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
>   KVM: x86/vPMU: Convert pmu.c code into Intel specific code
>   KVM: x86/vPMU: Implement AMD PMU support for KVM
>   KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
> 
>  arch/x86/include/asm/kvm_host.h |  52 ++--
>  arch/x86/kvm/Makefile           |   6 +-
>  arch/x86/kvm/cpuid.c            |   2 +-
>  arch/x86/kvm/lapic.c            |   1 +
>  arch/x86/kvm/pmu.c              | 576 ---------------------------------------
>  arch/x86/kvm/pmu_amd.c          | 390 ++++++++++++++++++++++++++
>  arch/x86/kvm/pmu_intel.c        | 586 ++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm.c              |   7 +
>  arch/x86/kvm/vmx.c              |   7 +
>  arch/x86/kvm/x86.c              |  74 ++---
>  10 files changed, 1070 insertions(+), 631 deletions(-)
>  delete mode 100644 arch/x86/kvm/pmu.c
>  create mode 100644 arch/x86/kvm/pmu_amd.c
>  create mode 100644 arch/x86/kvm/pmu_intel.c
> 
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM
  2014-10-31 16:05 ` [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM Wei Huang
@ 2014-11-03 18:17   ` Radim Krčmář
  2014-11-04 18:20     ` Wei Huang
  0 siblings, 1 reply; 13+ messages in thread
From: Radim Krčmář @ 2014-11-03 18:17 UTC (permalink / raw)
  To: Wei Huang; +Cc: kvm, pbonzini, gleb

2014-10-31 12:05-0400, Wei Huang:
> This patch implemented vPMU for AMD platform. The design piggybacks
> on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses
> the parts of generic counters. The kvm_pmu_ops interface is also
> initialized in this patch.
> 
> Signed-off-by: Wei Huang <wei@redhat.com>
> ---
>  arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 318 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
> index 6d6e1c3..19cfe26 100644
> --- a/arch/x86/kvm/pmu_amd.c
> +++ b/arch/x86/kvm/pmu_amd.c
> @@ -16,58 +16,362 @@
>  #include <linux/types.h>
>  #include <linux/kvm_host.h>
>  #include <linux/perf_event.h>
> -#include <asm/perf_event.h>
>  #include "x86.h"
>  #include "cpuid.h"
>  #include "lapic.h"
>  
> -void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
> +/* duplicated from amd_perfmon_event_map, K7 and above should work */
> +static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
> +	[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
> +	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
> +	[2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
> +	[3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },

> +	[4] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
> +	[5] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },

amd_perfmon_event_map has 0xc2 and 0xc3.

> +	[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
> +	[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
> +};
> +
> +static inline bool pmc_is_gp(struct kvm_pmc *pmc)
>  {
> +	return pmc->type == KVM_PMC_GP;
>  }

What is the benefit of having the same implementation in both files?

I'd prefer to have it in pmu.h and I'll try to note all functions that
look identical.  As always, you can ignore anything in parentheses,
there are only few real issues with this patch.

>  
> -int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
> +static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
> +					 u32 base)

(pmu.h)

>  {
> -	return 1;
> +	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
> +		return &pmu->gp_counters[msr - base];
> +
> +	return NULL;
>  }
>  
> -int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
> +static inline u64 pmc_bitmask(struct kvm_pmc *pmc)

(pmu.h)

>  {
> -	return 1;
> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
> +
> +	return pmu->counter_bitmask[pmc->type];
>  }
>  
> -int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
>  {
> -	return 1;
> +	return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0);
>  }
>  
> -int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
> +void amd_deliver_pmi(struct kvm_vcpu *vcpu)

static.

(pmu.c, rename to deliver_pmi and reuse for intel.)

>  {
> -	return 1;
> +	if (vcpu->arch.apic)
> +		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
>  }
>  
> -bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
> +static void trigger_pmi(struct irq_work *irq_work)

(pmu.c)

>  {
> -	return 0;
> +	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
> +			irq_work);
> +	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
> +			arch.pmu);
> +
> +	amd_deliver_pmi(vcpu);
>  }
>  
> -void amd_deliver_pmi(struct kvm_vcpu *vcpu)
> +static u64 read_pmc(struct kvm_pmc *pmc)

(pmu.c)

> +{
> +	u64 counter, enabled, running, result, delta, prev;
> +
> +	counter = pmc->counter;
> +	prev = pmc->counter;
> +
> +	if (pmc->perf_event) {
> +		delta = perf_event_read_value(pmc->perf_event,
> +					      &enabled, &running);
> +		counter += delta;
> +	}
> +
> +	result = counter & pmc_bitmask(pmc);
> +
> +	return result;
> +}

No.  The one intel uses is better.
(This one looks like a relic from experimenting.)

> +
> +static void stop_counter(struct kvm_pmc *pmc)

(pmu.c)

> +{
> +	if (pmc->perf_event) {
> +		pmc->counter = read_pmc(pmc);
> +		perf_event_release_kernel(pmc->perf_event);
> +		pmc->perf_event = NULL;
> +	}
> +}
> +
> +static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_select,
> +				u8 unit_mask)
> +{

(Intel calls this find_arch_event.)

> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
> +		if (amd_event_mapping[i].eventsel == event_select
> +		    && amd_event_mapping[i].unit_mask == unit_mask)

(And it could be less extensible, but shorter:
  			return amd_event_mapping[i].event_type;
  	}
  	return PERF_COUNT_HW_MAX;
  }
)

> +			break;
> +
> +	if (i == ARRAY_SIZE(amd_event_mapping))
> +		return PERF_COUNT_HW_MAX;
> +
> +	return amd_event_mapping[i].event_type;
> +}
> +
> +static void kvm_perf_overflow_intr(struct perf_event *perf_event,
> +		struct perf_sample_data *data, struct pt_regs *regs)
> +{

(pmu.c)

> +	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
> +
> +	if (!test_and_set_bit(pmc->idx,
> +			      (unsigned long *)&pmu->reprogram_pmi)) {

(The useless set_bit for intel is not that bad to keep them separate;
 please mind my radicality when it comes to abstracting.)

> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
> +
> +		if (!kvm_is_in_guest())
> +			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
> +		else
> +			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
> +	}
> +}
> +
> +static void kvm_perf_overflow(struct perf_event *perf_event,
> +			      struct perf_sample_data *data,
> +			      struct pt_regs *regs)
> +{

(pmu.c, ditto.)

> +	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
> +
> +	if (!test_and_set_bit(pmc->idx,
> +			      (unsigned long *)&pmu->reprogram_pmi)) {
> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
> +	}
> +}
> +
> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
> +		unsigned config, bool exclude_user, bool exclude_kernel,
> +		bool intr)

(Could be merged in my world, I'll comment on previous patch.)

> +{
> +	struct perf_event *event;
> +	struct perf_event_attr attr = {
> +		.type = type,
> +		.size = sizeof(attr),
> +		.pinned = true,
> +		.exclude_idle = true,
> +		.exclude_host = 1,
> +		.exclude_user = exclude_user,
> +		.exclude_kernel = exclude_kernel,
> +		.config = config,
> +	};
> +
> +	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
> +
> +	event = perf_event_create_kernel_counter(&attr, -1, current,
> +						 intr?kvm_perf_overflow_intr :
> +						 kvm_perf_overflow, pmc);
> +	if (IS_ERR(event)) {
> +		printk_once("kvm: pmu event creation failed %ld\n",
> +			    PTR_ERR(event));
> +		return;
> +	}
> +
> +	pmc->perf_event = event;
> +	clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogram_pmi);
> +}
> +
> +
> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
> +{

(Ditto.)

> +	unsigned config, type = PERF_TYPE_RAW;
> +	u8 event_select, unit_mask;
> +
> +	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
> +		printk_once("kvm pmu: pin control bit is ignored\n");
> +
> +	pmc->eventsel = eventsel;
> +
> +	stop_counter(pmc);
> +
> +	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE))
> +		return;
> +
> +	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> +	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
> +
> +	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
> +			  ARCH_PERFMON_EVENTSEL_INV |
> +			  ARCH_PERFMON_EVENTSEL_CMASK |
> +			  HSW_IN_TX |
> +			  HSW_IN_TX_CHECKPOINTED))) {
> +		config = find_hw_type_event(&pmc->vcpu->arch.pmu, event_select,
> +					 unit_mask);
> +		if (config != PERF_COUNT_HW_MAX)
> +			type = PERF_TYPE_HARDWARE;
> +	}
> +
> +	if (type == PERF_TYPE_RAW)
> +		config = eventsel & X86_RAW_EVENT_MASK;
> +
> +	reprogram_counter(pmc, type, config,
> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
> +			  eventsel & ARCH_PERFMON_EVENTSEL_INT);
> +}
> +
> +static void reprogram_idx(struct kvm_pmu *pmu, int idx)
>  {
> +	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
> +
> +	if (!pmc || !pmc_is_gp(pmc))
> +		return;
> +	reprogram_gp_counter(pmc, pmc->eventsel);
>  }
>  
>  void amd_handle_pmu_event(struct kvm_vcpu *vcpu)
>  {

static.

(pmu.c)

> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	u64 bitmask;
> +	int bit;
> +
> +	bitmask = pmu->reprogram_pmi;
> +
> +	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
> +		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
> +
> +		if (unlikely(!pmc || !pmc->perf_event)) {
> +			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
> +			continue;
> +		}
> +
> +		reprogram_idx(pmu, bit);
> +	}
>  }
>  
>  void amd_pmu_reset(struct kvm_vcpu *vcpu)

static.

>  {
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	int i;
> +
> +	irq_work_sync(&pmu->irq_work);
> +
> +	for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
> +		struct kvm_pmc *pmc = &pmu->gp_counters[i];
> +
> +		stop_counter(pmc);
> +		pmc->counter = pmc->eventsel = 0;
> +	}
> +}
> +
> +void amd_pmu_destroy(struct kvm_vcpu *vcpu)

static.

> +{
> +	amd_pmu_reset(vcpu);
> +}
> +
> +int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)

static.

> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +
> +	pmc &= ~(3u << 30);

(-1u >> 2.)

> +	return (pmc >= pmu->nr_arch_gp_counters);
> +}
> +
> +int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)

static.

> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	struct kvm_pmc *counters;
> +	u64 ctr;
> +
> +	pmc &= ~(3u << 30);
> +	if (pmc >= pmu->nr_arch_gp_counters)
> +		return 1;
> +
> +	counters = pmu->gp_counters;
> +	ctr = read_pmc(&counters[pmc]);
> +	*data = ctr;
> +
> +	return 0;
> +}
> +
> +int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

static.

> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	struct kvm_pmc *pmc;
> +	u32 index = msr_info->index;
> +	u64 data = msr_info->data;
> +
> +	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
> +		if (!msr_info->host_initiated)
> +			data = (s64)data;
> +		pmc->counter += data - read_pmc(pmc);
> +		return 0;
> +	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
> +		if (data == pmc->eventsel)
> +			return 0;
> +		if (!(data & pmu->reserved_bits)) {
> +			reprogram_gp_counter(pmc, data);
> +			return 0;
> +		}
> +	}
> +
> +	return 1;
> +}
> +
> +int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)

static.

> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	struct kvm_pmc *pmc;
> +
> +	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
> +		*data = read_pmc(pmc);
> +		return 0;
> +	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
> +		*data = pmc->eventsel;
> +		return 0;
> +	}
> +
> +	return 1;
> +}
> +
> +void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)

static.

> +{
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +
> +	pmu->nr_arch_gp_counters = 0;
> +	pmu->nr_arch_fixed_counters = 0;
> +	pmu->counter_bitmask[KVM_PMC_GP] = 0;
> +	pmu->version = 0;
> +	pmu->reserved_bits = 0xffffffff00200000ull;
> +

> +	/* configuration */
> +	pmu->nr_arch_gp_counters = 4;

AMD64_NUM_COUNTERS?

> +	pmu->nr_arch_fixed_counters = 0;
> +	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;

amd_pmu_cpuid_update doesn't depend on cpuid at all.
(I'd keep this function empty.)

>  }
>  
>  void amd_pmu_init(struct kvm_vcpu *vcpu)

static.

>  {
> +	int i;
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +
> +	memset(pmu, 0, sizeof(*pmu));
> +	for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
> +		pmu->gp_counters[i].type = KVM_PMC_GP;
> +		pmu->gp_counters[i].vcpu = vcpu;
> +		pmu->gp_counters[i].idx = i;
> +	}
> +
> +	init_irq_work(&pmu->irq_work, trigger_pmi);
> +	amd_pmu_cpuid_update(vcpu);
>  }
>  
> -void amd_pmu_destroy(struct kvm_vcpu *vcpu)
> +bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)

static.

>  {
> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
> +	int ret;
> +
> +	ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
> +		get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
> +
> +	return ret;
>  }
>  
>  struct kvm_pmu_ops amd_pmu_ops = {
> -- 
> 1.8.3.1
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 0/4] KVM vPMU support for x86
  2014-11-03 17:56 ` [PATCH V1 0/4] KVM vPMU support for x86 Radim Krčmář
@ 2014-11-03 18:23   ` Wei Huang
  2014-11-03 18:39     ` Radim Krčmář
  0 siblings, 1 reply; 13+ messages in thread
From: Wei Huang @ 2014-11-03 18:23 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: kvm, pbonzini, gleb



On 11/03/2014 11:56 AM, Radim Krčmář wrote:
> 2014-10-31 12:05-0400, Wei Huang:
>> Currently KVM only supports vPMU for Intel platforms. This patch set 
>> enable vPMU support for AMD platform by creating a common PMU
>> interface for x86. The PMU calls from guest VMs are dispatched
>> to corresponding functions defined in arch specific files.
> 
> The functionality looks good, so I just want verify the basic design:
> why don't we emulate AMD PMU on Intel, and vice versa?
> (Underlying PERF_COUNTs are identical in both.)

Thanks. The underlining perf counters can be very different between AMD
and Intel. I think we can emulate AMD on Intel, or vice versa, for some
common perfmon_events (such as PERF_COUNT_HW_CPU_CYCLES). But as soon as
guest VMs access raw counters (see PERF_TYPE_RAW), we can't emulate them
anymore.

> 
>> V1:
>>   * Adopt the file layout suggested by Radim Krčmář
> 
> (I'll still advocate for more separation, sorry.)
> 
>>   * Link arch module with its specific PMU file
>>
>> RFC:
>>   * Initial version for RFC
>>
>> Wei Huang (4):
>>   KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch
>>   KVM: x86/vPMU: Convert pmu.c code into Intel specific code
>>   KVM: x86/vPMU: Implement AMD PMU support for KVM
>>   KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs
>>
>>  arch/x86/include/asm/kvm_host.h |  52 ++--
>>  arch/x86/kvm/Makefile           |   6 +-
>>  arch/x86/kvm/cpuid.c            |   2 +-
>>  arch/x86/kvm/lapic.c            |   1 +
>>  arch/x86/kvm/pmu.c              | 576 ---------------------------------------
>>  arch/x86/kvm/pmu_amd.c          | 390 ++++++++++++++++++++++++++
>>  arch/x86/kvm/pmu_intel.c        | 586 ++++++++++++++++++++++++++++++++++++++++
>>  arch/x86/kvm/svm.c              |   7 +
>>  arch/x86/kvm/vmx.c              |   7 +
>>  arch/x86/kvm/x86.c              |  74 ++---
>>  10 files changed, 1070 insertions(+), 631 deletions(-)
>>  delete mode 100644 arch/x86/kvm/pmu.c
>>  create mode 100644 arch/x86/kvm/pmu_amd.c
>>  create mode 100644 arch/x86/kvm/pmu_intel.c
>>
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code
  2014-10-31 16:05 ` [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code Wei Huang
@ 2014-11-03 18:36   ` Radim Krčmář
  2014-11-04 18:38     ` Wei Huang
  0 siblings, 1 reply; 13+ messages in thread
From: Radim Krčmář @ 2014-11-03 18:36 UTC (permalink / raw)
  To: Wei Huang; +Cc: kvm, pbonzini, gleb

2014-10-31 12:05-0400, Wei Huang:
> This patch converts existing pmu.c into Intel specific code and hooks
> up with the PMU interface using the following steps:
> 
> - Convert pmu.c to pmu_intel.c; All public PMU functions are renamed
>   and hooked up with the newly defined intel_pmu_ops.
> - Create a corresponding pmu_amd.c file with empty functions for AMD
>   arch.
> - The PMU function pointer, kvm_pmu_ops, is initialized by calling
>   kvm_x86_ops->get_pmu_ops().
> - To reduce the code size, Intel and AMD modules are now generated
>   from their corrponding arch and PMU files; In the meanwhile, due
>   to this arrangement several, functions are exposed as public ones
>   to allow calling from PMU code.

(Patch would have been easier to review as two patches, where the first
 one just renames pmu to pmu_intel and the second one does the split.)

> Signed-off-by: Wei Huang <wei@redhat.com>
> ---

---
The rest is an idea for consideration ...
Please consider everything from now to be in parentheses
= ignore you are not enjoying shuffling code around.

> +
> +static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
> +		u8 unit_mask)

The prototype could be exteded with
  struct kvm_event_hw_type_mapping, size_t nr_events
and reused for AMD as well.

> +{
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
> +		if (intel_arch_events[i].eventsel == event_select
> +				&& intel_arch_events[i].unit_mask == unit_mask
> +				&& (pmu->available_event_types & (1 << i)))

pmu->available_event_types would be -1 on AMD.

> +			break;
> +
> +	if (i == ARRAY_SIZE(intel_arch_events))
> +		return PERF_COUNT_HW_MAX;
> +
> +	return intel_arch_events[i].event_type;
> +}

> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
> +		unsigned config, bool exclude_user, bool exclude_kernel,
> +		bool intr, bool in_tx, bool in_tx_cp)

This function could drop 'bool in_tx' and 'bool in_tx_cp', because it
already accepts config, so these flags can already be included there.
It has only one caller that uses them anyway.

> +{
> +	struct perf_event *event;
> +	struct perf_event_attr attr = {
> +		.type = type,
> +		.size = sizeof(attr),
> +		.pinned = true,
> +		.exclude_idle = true,
> +		.exclude_host = 1,
> +		.exclude_user = exclude_user,
> +		.exclude_kernel = exclude_kernel,
> +		.config = config,
> +	};
> +	if (in_tx)
> +		attr.config |= HSW_IN_TX;
> +	if (in_tx_cp)
> +		attr.config |= HSW_IN_TX_CHECKPOINTED;

And after dropping this, it is identical to AMD.

> +
> +	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
> +
> +	event = perf_event_create_kernel_counter(&attr, -1, current,
> +						 intr ? kvm_perf_overflow_intr :
> +						 kvm_perf_overflow, pmc);
> +	if (IS_ERR(event)) {
> +		printk_once("kvm: pmu event creation failed %ld\n",
> +				PTR_ERR(event));
> +		return;
> +	}
> +
> +	pmc->perf_event = event;
> +	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
> +}
> +
> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
> +{

Because the two functions that this one uses have been merged, we could
propagate the changes and reuse this one with AMD as well.

> +	unsigned config, type = PERF_TYPE_RAW;
> +	u8 event_select, unit_mask;
> +
> +	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
> +		printk_once("kvm pmu: pin control bit is ignored\n");
> +
> +	pmc->eventsel = eventsel;
> +
> +	stop_counter(pmc);
> +
> +	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
> +		return;
> +
> +	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
> +	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
> +
> +	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
> +				ARCH_PERFMON_EVENTSEL_INV |
> +				ARCH_PERFMON_EVENTSEL_CMASK |
> +				HSW_IN_TX |
> +				HSW_IN_TX_CHECKPOINTED))) {
> +		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
> +				unit_mask);
> +		if (config != PERF_COUNT_HW_MAX)
> +			type = PERF_TYPE_HARDWARE;
> +	}
> +
> +	if (type == PERF_TYPE_RAW)
> +		config = eventsel & X86_RAW_EVENT_MASK;
> +
> +	reprogram_counter(pmc, type, config,
> +			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
> +			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
> +			eventsel & ARCH_PERFMON_EVENTSEL_INT,
> +			(eventsel & HSW_IN_TX),
> +			(eventsel & HSW_IN_TX_CHECKPOINTED));
> +}

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 0/4] KVM vPMU support for x86
  2014-11-03 18:23   ` Wei Huang
@ 2014-11-03 18:39     ` Radim Krčmář
  2014-11-03 18:47       ` Wei Huang
  0 siblings, 1 reply; 13+ messages in thread
From: Radim Krčmář @ 2014-11-03 18:39 UTC (permalink / raw)
  To: Wei Huang; +Cc: kvm, pbonzini, gleb

2014-11-03 12:23-0600, Wei Huang:
> 
> 
> On 11/03/2014 11:56 AM, Radim Krčmář wrote:
> > 2014-10-31 12:05-0400, Wei Huang:
> >> Currently KVM only supports vPMU for Intel platforms. This patch set 
> >> enable vPMU support for AMD platform by creating a common PMU
> >> interface for x86. The PMU calls from guest VMs are dispatched
> >> to corresponding functions defined in arch specific files.
> > 
> > The functionality looks good, so I just want verify the basic design:
> > why don't we emulate AMD PMU on Intel, and vice versa?
> > (Underlying PERF_COUNTs are identical in both.)
> 
> Thanks. The underlining perf counters can be very different between AMD
> and Intel. I think we can emulate AMD on Intel, or vice versa, for some
> common perfmon_events (such as PERF_COUNT_HW_CPU_CYCLES). But as soon as
> guest VMs access raw counters (see PERF_TYPE_RAW), we can't emulate them
> anymore.

Thanks, I guess raw counters are used more than I thought, so code
complexity would overshadow the gain of having at least something.

And then, there is the always perfect, "who cares" :)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 0/4] KVM vPMU support for x86
  2014-11-03 18:39     ` Radim Krčmář
@ 2014-11-03 18:47       ` Wei Huang
  0 siblings, 0 replies; 13+ messages in thread
From: Wei Huang @ 2014-11-03 18:47 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: kvm, pbonzini, gleb



On 11/03/2014 12:39 PM, Radim Krčmář wrote:
> 2014-11-03 12:23-0600, Wei Huang:
>>
>>
>> On 11/03/2014 11:56 AM, Radim Krčmář wrote:
>>> 2014-10-31 12:05-0400, Wei Huang:
>>>> Currently KVM only supports vPMU for Intel platforms. This patch set 
>>>> enable vPMU support for AMD platform by creating a common PMU
>>>> interface for x86. The PMU calls from guest VMs are dispatched
>>>> to corresponding functions defined in arch specific files.
>>>
>>> The functionality looks good, so I just want verify the basic design:
>>> why don't we emulate AMD PMU on Intel, and vice versa?
>>> (Underlying PERF_COUNTs are identical in both.)
>>
>> Thanks. The underlining perf counters can be very different between AMD
>> and Intel. I think we can emulate AMD on Intel, or vice versa, for some
>> common perfmon_events (such as PERF_COUNT_HW_CPU_CYCLES). But as soon as
>> guest VMs access raw counters (see PERF_TYPE_RAW), we can't emulate them
>> anymore.
> 
> Thanks, I guess raw counters are used more than I thought, so code
> complexity would overshadow the gain of having at least something.
To be honest I did try. But it became a big rat-hole as soon as I tried
to abstract them. This problem also applies to Intel CPUs between
generations, if perf counters are not arch_counters.

-Wei

> 
> And then, there is the always perfect, "who cares" :)


> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM
  2014-11-03 18:17   ` Radim Krčmář
@ 2014-11-04 18:20     ` Wei Huang
  0 siblings, 0 replies; 13+ messages in thread
From: Wei Huang @ 2014-11-04 18:20 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: kvm, pbonzini, gleb

On 11/03/2014 12:17 PM, Radim Krčmář wrote:
> 2014-10-31 12:05-0400, Wei Huang:
>> This patch implemented vPMU for AMD platform. The design piggybacks
>> on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses
>> the parts of generic counters. The kvm_pmu_ops interface is also
>> initialized in this patch.
>>
>> Signed-off-by: Wei Huang <wei@redhat.com>
>> ---
>>  arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 318 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
>> index 6d6e1c3..19cfe26 100644
>> --- a/arch/x86/kvm/pmu_amd.c
>> +++ b/arch/x86/kvm/pmu_amd.c
>> @@ -16,58 +16,362 @@
>>  #include <linux/types.h>
>>  #include <linux/kvm_host.h>
>>  #include <linux/perf_event.h>
>> -#include <asm/perf_event.h>
>>  #include "x86.h"
>>  #include "cpuid.h"
>>  #include "lapic.h"
>>  
>> -void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
>> +/* duplicated from amd_perfmon_event_map, K7 and above should work */
>> +static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
>> +	[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
>> +	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
>> +	[2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
>> +	[3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
> 
>> +	[4] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
>> +	[5] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
> 
> amd_perfmon_event_map has 0xc2 and 0xc3.
Sorry, will fix.
> 
>> +	[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
>> +	[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
>> +};
>> +
>> +static inline bool pmc_is_gp(struct kvm_pmc *pmc)
>>  {
>> +	return pmc->type == KVM_PMC_GP;
>>  }
> 
> What is the benefit of having the same implementation in both files?
> 

No benefits. In fact I can remove it from AMD code because it is always
true for AMD. Will fix.

> I'd prefer to have it in pmu.h and I'll try to note all functions that
> look identical.  As always, you can ignore anything in parentheses,
> there are only few real issues with this patch.
> 
>>  
>> -int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
>> +static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
>> +					 u32 base)
> 
> (pmu.h)

I will try to merge common ones into pmu.h in next re-spin. This applies
to the rest (pmu.h and pmu.c) in your replies.

> 
>>  {
>> -	return 1;
>> +	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
>> +		return &pmu->gp_counters[msr - base];
>> +
>> +	return NULL;
>>  }
>>  
>> -int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
>> +static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
> 
> (pmu.h)
> 
>>  {
>> -	return 1;
>> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
>> +
>> +	return pmu->counter_bitmask[pmc->type];
>>  }
>>  
>> -int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> +static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
>>  {
>> -	return 1;
>> +	return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0);
>>  }
>>  
>> -int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
>> +void amd_deliver_pmi(struct kvm_vcpu *vcpu)
> 
> static.
> 
> (pmu.c, rename to deliver_pmi and reuse for intel.)
> 

Same here. I will try to merge them as much as I can.

>>  {
>> -	return 1;
>> +	if (vcpu->arch.apic)
>> +		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
>>  }
>>  
>> -bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
>> +static void trigger_pmi(struct irq_work *irq_work)
> 
> (pmu.c)
> 
>>  {
>> -	return 0;
>> +	struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu,
>> +			irq_work);
>> +	struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu,
>> +			arch.pmu);
>> +
>> +	amd_deliver_pmi(vcpu);
>>  }
>>  
>> -void amd_deliver_pmi(struct kvm_vcpu *vcpu)
>> +static u64 read_pmc(struct kvm_pmc *pmc)
> 
> (pmu.c)
> 
>> +{
>> +	u64 counter, enabled, running, result, delta, prev;
>> +
>> +	counter = pmc->counter;
>> +	prev = pmc->counter;
>> +
>> +	if (pmc->perf_event) {
>> +		delta = perf_event_read_value(pmc->perf_event,
>> +					      &enabled, &running);
>> +		counter += delta;
>> +	}
>> +
>> +	result = counter & pmc_bitmask(pmc);
>> +
>> +	return result;
>> +}
> 
> No.  The one intel uses is better.
> (This one looks like a relic from experimenting.)

Yes, I inserted some debug message using "result" variable. Will fix.

> 
>> +
>> +static void stop_counter(struct kvm_pmc *pmc)
> 
> (pmu.c)
> 
>> +{
>> +	if (pmc->perf_event) {
>> +		pmc->counter = read_pmc(pmc);
>> +		perf_event_release_kernel(pmc->perf_event);
>> +		pmc->perf_event = NULL;
>> +	}
>> +}
>> +
>> +static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_select,
>> +				u8 unit_mask)
>> +{
> 
> (Intel calls this find_arch_event.)

I don't quite agree with existing naming, find_arch_event, in
pmu_intel.c. This function isn't directly related to arch event. It is
for mapping from perf_counters to linux_perf_event. But I only changed
the version pmu_amd.c though.

> 
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++)
>> +		if (amd_event_mapping[i].eventsel == event_select
>> +		    && amd_event_mapping[i].unit_mask == unit_mask)
> 
> (And it could be less extensible, but shorter:
>   			return amd_event_mapping[i].event_type;
>   	}
>   	return PERF_COUNT_HW_MAX;
>   }
> )

Will fix

> 
>> +			break;
>> +
>> +	if (i == ARRAY_SIZE(amd_event_mapping))
>> +		return PERF_COUNT_HW_MAX;
>> +
>> +	return amd_event_mapping[i].event_type;
>> +}
>> +
>> +static void kvm_perf_overflow_intr(struct perf_event *perf_event,
>> +		struct perf_sample_data *data, struct pt_regs *regs)
>> +{
> 
> (pmu.c)
> 
>> +	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
>> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
>> +
>> +	if (!test_and_set_bit(pmc->idx,
>> +			      (unsigned long *)&pmu->reprogram_pmi)) {
> 
> (The useless set_bit for intel is not that bad to keep them separate;
>  please mind my radicality when it comes to abstracting.)
> 
>> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>> +
>> +		if (!kvm_is_in_guest())
>> +			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
>> +		else
>> +			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
>> +	}
>> +}
>> +
>> +static void kvm_perf_overflow(struct perf_event *perf_event,
>> +			      struct perf_sample_data *data,
>> +			      struct pt_regs *regs)
>> +{
> 
> (pmu.c, ditto.)
> 
>> +	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
>> +	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
>> +
>> +	if (!test_and_set_bit(pmc->idx,
>> +			      (unsigned long *)&pmu->reprogram_pmi)) {
>> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>> +	}
>> +}
>> +
>> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
>> +		unsigned config, bool exclude_user, bool exclude_kernel,
>> +		bool intr)
> 
> (Could be merged in my world, I'll comment on previous patch.)

Will try.

> 
>> +{
>> +	struct perf_event *event;
>> +	struct perf_event_attr attr = {
>> +		.type = type,
>> +		.size = sizeof(attr),
>> +		.pinned = true,
>> +		.exclude_idle = true,
>> +		.exclude_host = 1,
>> +		.exclude_user = exclude_user,
>> +		.exclude_kernel = exclude_kernel,
>> +		.config = config,
>> +	};
>> +
>> +	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
>> +
>> +	event = perf_event_create_kernel_counter(&attr, -1, current,
>> +						 intr?kvm_perf_overflow_intr :
>> +						 kvm_perf_overflow, pmc);
>> +	if (IS_ERR(event)) {
>> +		printk_once("kvm: pmu event creation failed %ld\n",
>> +			    PTR_ERR(event));
>> +		return;
>> +	}
>> +
>> +	pmc->perf_event = event;
>> +	clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogram_pmi);
>> +}
>> +
>> +
>> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>> +{
> 
> (Ditto.)
> 
>> +	unsigned config, type = PERF_TYPE_RAW;
>> +	u8 event_select, unit_mask;
>> +
>> +	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
>> +		printk_once("kvm pmu: pin control bit is ignored\n");
>> +
>> +	pmc->eventsel = eventsel;
>> +
>> +	stop_counter(pmc);
>> +
>> +	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE))
>> +		return;
>> +
>> +	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
>> +	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>> +
>> +	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>> +			  ARCH_PERFMON_EVENTSEL_INV |
>> +			  ARCH_PERFMON_EVENTSEL_CMASK |
>> +			  HSW_IN_TX |
>> +			  HSW_IN_TX_CHECKPOINTED))) {
>> +		config = find_hw_type_event(&pmc->vcpu->arch.pmu, event_select,
>> +					 unit_mask);
>> +		if (config != PERF_COUNT_HW_MAX)
>> +			type = PERF_TYPE_HARDWARE;
>> +	}
>> +
>> +	if (type == PERF_TYPE_RAW)
>> +		config = eventsel & X86_RAW_EVENT_MASK;
>> +
>> +	reprogram_counter(pmc, type, config,
>> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
>> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
>> +			  eventsel & ARCH_PERFMON_EVENTSEL_INT);
>> +}
>> +
>> +static void reprogram_idx(struct kvm_pmu *pmu, int idx)
>>  {
>> +	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
>> +
>> +	if (!pmc || !pmc_is_gp(pmc))
>> +		return;
>> +	reprogram_gp_counter(pmc, pmc->eventsel);
>>  }
>>  
>>  void amd_handle_pmu_event(struct kvm_vcpu *vcpu)
>>  {
> 
> static.
> 
> (pmu.c)
> 
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	u64 bitmask;
>> +	int bit;
>> +
>> +	bitmask = pmu->reprogram_pmi;
>> +
>> +	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
>> +		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
>> +
>> +		if (unlikely(!pmc || !pmc->perf_event)) {
>> +			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
>> +			continue;
>> +		}
>> +
>> +		reprogram_idx(pmu, bit);
>> +	}
>>  }
>>  
>>  void amd_pmu_reset(struct kvm_vcpu *vcpu)
> 
> static.
> 
>>  {
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	int i;
>> +
>> +	irq_work_sync(&pmu->irq_work);
>> +
>> +	for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
>> +		struct kvm_pmc *pmc = &pmu->gp_counters[i];
>> +
>> +		stop_counter(pmc);
>> +		pmc->counter = pmc->eventsel = 0;
>> +	}
>> +}
>> +
>> +void amd_pmu_destroy(struct kvm_vcpu *vcpu)
> 
> static.
> 
>> +{
>> +	amd_pmu_reset(vcpu);
>> +}
>> +
>> +int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
> 
> static.
> 
>> +{
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +
>> +	pmc &= ~(3u << 30);
> 
> (-1u >> 2.)
> 
>> +	return (pmc >= pmu->nr_arch_gp_counters);
>> +}
>> +
>> +int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
> 
> static.
> 
>> +{
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	struct kvm_pmc *counters;
>> +	u64 ctr;
>> +
>> +	pmc &= ~(3u << 30);
>> +	if (pmc >= pmu->nr_arch_gp_counters)
>> +		return 1;
>> +
>> +	counters = pmu->gp_counters;
>> +	ctr = read_pmc(&counters[pmc]);
>> +	*data = ctr;
>> +
>> +	return 0;
>> +}
>> +
>> +int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> 
> static.
> 
>> +{
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	struct kvm_pmc *pmc;
>> +	u32 index = msr_info->index;
>> +	u64 data = msr_info->data;
>> +
>> +	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
>> +		if (!msr_info->host_initiated)
>> +			data = (s64)data;
>> +		pmc->counter += data - read_pmc(pmc);
>> +		return 0;
>> +	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
>> +		if (data == pmc->eventsel)
>> +			return 0;
>> +		if (!(data & pmu->reserved_bits)) {
>> +			reprogram_gp_counter(pmc, data);
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
> 
> static.
> 
>> +{
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	struct kvm_pmc *pmc;
>> +
>> +	if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
>> +		*data = read_pmc(pmc);
>> +		return 0;
>> +	} else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
>> +		*data = pmc->eventsel;
>> +		return 0;
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
> 
> static.
> 
>> +{
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +
>> +	pmu->nr_arch_gp_counters = 0;
>> +	pmu->nr_arch_fixed_counters = 0;
>> +	pmu->counter_bitmask[KVM_PMC_GP] = 0;
>> +	pmu->version = 0;
>> +	pmu->reserved_bits = 0xffffffff00200000ull;
>> +
> 
>> +	/* configuration */
>> +	pmu->nr_arch_gp_counters = 4;
> 
> AMD64_NUM_COUNTERS?

Yes, will fix.

> 
>> +	pmu->nr_arch_fixed_counters = 0;
>> +	pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
> 
> amd_pmu_cpuid_update doesn't depend on cpuid at all.
> (I'd keep this function empty.)
> 
>>  }
>>  
>>  void amd_pmu_init(struct kvm_vcpu *vcpu)
> 
> static.
> 
>>  {
>> +	int i;
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +
>> +	memset(pmu, 0, sizeof(*pmu));
>> +	for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
>> +		pmu->gp_counters[i].type = KVM_PMC_GP;
>> +		pmu->gp_counters[i].vcpu = vcpu;
>> +		pmu->gp_counters[i].idx = i;
>> +	}
>> +
>> +	init_irq_work(&pmu->irq_work, trigger_pmi);
>> +	amd_pmu_cpuid_update(vcpu);
>>  }
>>  
>> -void amd_pmu_destroy(struct kvm_vcpu *vcpu)
>> +bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
> 
> static.
> 
>>  {
>> +	struct kvm_pmu *pmu = &vcpu->arch.pmu;
>> +	int ret;
>> +
>> +	ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
>> +		get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
>> +
>> +	return ret;
>>  }
>>  
>>  struct kvm_pmu_ops amd_pmu_ops = {
>> -- 
>> 1.8.3.1
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code
  2014-11-03 18:36   ` Radim Krčmář
@ 2014-11-04 18:38     ` Wei Huang
  0 siblings, 0 replies; 13+ messages in thread
From: Wei Huang @ 2014-11-04 18:38 UTC (permalink / raw)
  To: Radim Krčmář; +Cc: kvm, pbonzini, gleb



On 11/03/2014 12:36 PM, Radim Krčmář wrote:
> 2014-10-31 12:05-0400, Wei Huang:
>> This patch converts existing pmu.c into Intel specific code and hooks
>> up with the PMU interface using the following steps:
>>
>> - Convert pmu.c to pmu_intel.c; All public PMU functions are renamed
>>   and hooked up with the newly defined intel_pmu_ops.
>> - Create a corresponding pmu_amd.c file with empty functions for AMD
>>   arch.
>> - The PMU function pointer, kvm_pmu_ops, is initialized by calling
>>   kvm_x86_ops->get_pmu_ops().
>> - To reduce the code size, Intel and AMD modules are now generated
>>   from their corrponding arch and PMU files; In the meanwhile, due
>>   to this arrangement several, functions are exposed as public ones
>>   to allow calling from PMU code.
> 
> (Patch would have been easier to review as two patches, where the first
>  one just renames pmu to pmu_intel and the second one does the split.)

Will do in next spin.

> 
>> Signed-off-by: Wei Huang <wei@redhat.com>
>> ---
> 
> ---
> The rest is an idea for consideration ...
> Please consider everything from now to be in parentheses
> = ignore you are not enjoying shuffling code around.
> 
>> +
>> +static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
>> +		u8 unit_mask)
> 
> The prototype could be exteded with
>   struct kvm_event_hw_type_mapping, size_t nr_events
> and reused for AMD as well.
> 
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++)
>> +		if (intel_arch_events[i].eventsel == event_select
>> +				&& intel_arch_events[i].unit_mask == unit_mask
>> +				&& (pmu->available_event_types & (1 << i)))
> 
> pmu->available_event_types would be -1 on AMD.
> 
>> +			break;
>> +
>> +	if (i == ARRAY_SIZE(intel_arch_events))
>> +		return PERF_COUNT_HW_MAX;
>> +
>> +	return intel_arch_events[i].event_type;
>> +}
> 
>> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
>> +		unsigned config, bool exclude_user, bool exclude_kernel,
>> +		bool intr, bool in_tx, bool in_tx_cp)
> 
> This function could drop 'bool in_tx' and 'bool in_tx_cp', because it
> already accepts config, so these flags can already be included there.
> It has only one caller that uses them anyway.
> 
>> +{
>> +	struct perf_event *event;
>> +	struct perf_event_attr attr = {
>> +		.type = type,
>> +		.size = sizeof(attr),
>> +		.pinned = true,
>> +		.exclude_idle = true,
>> +		.exclude_host = 1,
>> +		.exclude_user = exclude_user,
>> +		.exclude_kernel = exclude_kernel,
>> +		.config = config,
>> +	};
>> +	if (in_tx)
>> +		attr.config |= HSW_IN_TX;
>> +	if (in_tx_cp)
>> +		attr.config |= HSW_IN_TX_CHECKPOINTED;
> 
> And after dropping this, it is identical to AMD.

Thanks. I will consolidate the ideas above into next spin.

> 
>> +
>> +	attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
>> +
>> +	event = perf_event_create_kernel_counter(&attr, -1, current,
>> +						 intr ? kvm_perf_overflow_intr :
>> +						 kvm_perf_overflow, pmc);
>> +	if (IS_ERR(event)) {
>> +		printk_once("kvm: pmu event creation failed %ld\n",
>> +				PTR_ERR(event));
>> +		return;
>> +	}
>> +
>> +	pmc->perf_event = event;
>> +	clear_bit(pmc->idx, (unsigned long*)&pmc->vcpu->arch.pmu.reprogram_pmi);
>> +}
>> +
>> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>> +{
> 
> Because the two functions that this one uses have been merged, we could
> propagate the changes and reuse this one with AMD as well.
> 
>> +	unsigned config, type = PERF_TYPE_RAW;
>> +	u8 event_select, unit_mask;
>> +
>> +	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
>> +		printk_once("kvm pmu: pin control bit is ignored\n");
>> +
>> +	pmc->eventsel = eventsel;
>> +
>> +	stop_counter(pmc);
>> +
>> +	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
>> +		return;
>> +
>> +	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
>> +	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>> +
>> +	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>> +				ARCH_PERFMON_EVENTSEL_INV |
>> +				ARCH_PERFMON_EVENTSEL_CMASK |
>> +				HSW_IN_TX |
>> +				HSW_IN_TX_CHECKPOINTED))) {
>> +		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
>> +				unit_mask);
>> +		if (config != PERF_COUNT_HW_MAX)
>> +			type = PERF_TYPE_HARDWARE;
>> +	}
>> +
>> +	if (type == PERF_TYPE_RAW)
>> +		config = eventsel & X86_RAW_EVENT_MASK;
>> +
>> +	reprogram_counter(pmc, type, config,
>> +			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
>> +			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
>> +			eventsel & ARCH_PERFMON_EVENTSEL_INT,
>> +			(eventsel & HSW_IN_TX),
>> +			(eventsel & HSW_IN_TX_CHECKPOINTED));
>> +}

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2014-11-04 18:38 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-10-31 16:05 [PATCH V1 0/4] KVM vPMU support for x86 Wei Huang
2014-10-31 16:05 ` [PATCH V1 1/4] KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch Wei Huang
2014-10-31 16:05 ` [PATCH V1 2/4] KVM: x86/vPMU: Convert pmu.c code into Intel specific code Wei Huang
2014-11-03 18:36   ` Radim Krčmář
2014-11-04 18:38     ` Wei Huang
2014-10-31 16:05 ` [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM Wei Huang
2014-11-03 18:17   ` Radim Krčmář
2014-11-04 18:20     ` Wei Huang
2014-10-31 16:05 ` [PATCH V1 4/4] KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs Wei Huang
2014-11-03 17:56 ` [PATCH V1 0/4] KVM vPMU support for x86 Radim Krčmář
2014-11-03 18:23   ` Wei Huang
2014-11-03 18:39     ` Radim Krčmář
2014-11-03 18:47       ` Wei Huang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox