From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: [PATCH v5 RESEND 17/17] x86/VPMU: Move VPMU files up from hvm/ directory Date: Wed, 23 Apr 2014 08:50:38 -0400 Message-ID: <1398257438-4994-18-git-send-email-boris.ostrovsky@oracle.com> References: <1398257438-4994-1-git-send-email-boris.ostrovsky@oracle.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1398257438-4994-1-git-send-email-boris.ostrovsky@oracle.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: kevin.tian@intel.com Cc: keir@xen.org, jun.nakajima@intel.com, andrew.cooper3@citrix.com, eddie.dong@intel.com, donald.d.dugger@intel.com, xen-devel@lists.xen.org, dietmar.hahn@ts.fujitsu.com, JBeulich@suse.com, boris.ostrovsky@oracle.com, suravee.suthikulpanit@amd.com List-Id: xen-devel@lists.xenproject.org Since PMU is now not HVM specific we can move VPMU-related files up from arch/x86/hvm/ directory. Specifically: arch/x86/hvm/vpmu.c -> arch/x86/vpmu.c arch/x86/hvm/svm/vpmu.c -> arch/x86/vpmu_amd.c arch/x86/hvm/vmx/vpmu_core2.c -> arch/x86/vpmu_intel.c include/asm-x86/hvm/vpmu.h -> include/asm-x86/vpmu.h Signed-off-by: Boris Ostrovsky --- xen/arch/x86/Makefile | 1 + xen/arch/x86/hvm/Makefile | 1 - xen/arch/x86/hvm/svm/Makefile | 1 - xen/arch/x86/hvm/svm/vpmu.c | 509 ------------------ xen/arch/x86/hvm/vlapic.c | 2 +- xen/arch/x86/hvm/vmx/Makefile | 1 - xen/arch/x86/hvm/vmx/vpmu_core2.c | 940 ---------------------------------- xen/arch/x86/hvm/vpmu.c | 720 -------------------------- xen/arch/x86/oprofile/op_model_ppro.c | 2 +- xen/arch/x86/traps.c | 2 +- xen/arch/x86/vpmu.c | 720 ++++++++++++++++++++++++++ xen/arch/x86/vpmu_amd.c | 509 ++++++++++++++++++ xen/arch/x86/vpmu_intel.c | 940 ++++++++++++++++++++++++++++++++++ xen/include/asm-x86/hvm/vmx/vmcs.h | 2 +- xen/include/asm-x86/hvm/vpmu.h | 99 ---- xen/include/asm-x86/vpmu.h | 99 ++++ 16 files changed, 2273 insertions(+), 2275 deletions(-) delete mode 100644 xen/arch/x86/hvm/svm/vpmu.c delete mode 100644 xen/arch/x86/hvm/vmx/vpmu_core2.c delete mode 100644 xen/arch/x86/hvm/vpmu.c create mode 100644 xen/arch/x86/vpmu.c create mode 100644 xen/arch/x86/vpmu_amd.c create mode 100644 xen/arch/x86/vpmu_intel.c delete mode 100644 xen/include/asm-x86/hvm/vpmu.h create mode 100644 xen/include/asm-x86/vpmu.h diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile index d502bdf..cf85dda 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -58,6 +58,7 @@ obj-y += crash.o obj-y += tboot.o obj-y += hpet.o obj-y += xstate.o +obj-y += vpmu.o vpmu_amd.o vpmu_intel.o obj-$(crash_debug) += gdbstub.o diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile index eea5555..742b83b 100644 --- a/xen/arch/x86/hvm/Makefile +++ b/xen/arch/x86/hvm/Makefile @@ -22,4 +22,3 @@ obj-y += vlapic.o obj-y += vmsi.o obj-y += vpic.o obj-y += vpt.o -obj-y += vpmu.o \ No newline at end of file diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile index a10a55e..760d295 100644 --- a/xen/arch/x86/hvm/svm/Makefile +++ b/xen/arch/x86/hvm/svm/Makefile @@ -6,4 +6,3 @@ obj-y += nestedsvm.o obj-y += svm.o obj-y += svmdebug.o obj-y += vmcb.o -obj-y += vpmu.o diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c deleted file mode 100644 index 0e5dac4..0000000 --- a/xen/arch/x86/hvm/svm/vpmu.c +++ /dev/null @@ -1,509 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2010, Advanced Micro Devices, Inc. - * Parts of this code are Copyright (c) 2007, Intel Corporation - * - * Author: Wei Wang - * Tested by: Suravee Suthikulpanit - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MSR_F10H_EVNTSEL_GO_SHIFT 40 -#define MSR_F10H_EVNTSEL_EN_SHIFT 22 -#define MSR_F10H_COUNTER_LENGTH 48 - -#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) -#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) -#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) - -static unsigned int __read_mostly num_counters; -static const u32 __read_mostly *counters; -static const u32 __read_mostly *ctrls; -static bool_t __read_mostly k7_counters_mirrored; - -#define F10H_NUM_COUNTERS 4 -#define F15H_NUM_COUNTERS 6 -#define AMD_MAX_COUNTERS 6 - -/* PMU Counter MSRs. */ -static const u32 AMD_F10H_COUNTERS[] = { - MSR_K7_PERFCTR0, - MSR_K7_PERFCTR1, - MSR_K7_PERFCTR2, - MSR_K7_PERFCTR3 -}; - -/* PMU Control MSRs. */ -static const u32 AMD_F10H_CTRLS[] = { - MSR_K7_EVNTSEL0, - MSR_K7_EVNTSEL1, - MSR_K7_EVNTSEL2, - MSR_K7_EVNTSEL3 -}; - -static const u32 AMD_F15H_COUNTERS[] = { - MSR_AMD_FAM15H_PERFCTR0, - MSR_AMD_FAM15H_PERFCTR1, - MSR_AMD_FAM15H_PERFCTR2, - MSR_AMD_FAM15H_PERFCTR3, - MSR_AMD_FAM15H_PERFCTR4, - MSR_AMD_FAM15H_PERFCTR5 -}; - -static const u32 AMD_F15H_CTRLS[] = { - MSR_AMD_FAM15H_EVNTSEL0, - MSR_AMD_FAM15H_EVNTSEL1, - MSR_AMD_FAM15H_EVNTSEL2, - MSR_AMD_FAM15H_EVNTSEL3, - MSR_AMD_FAM15H_EVNTSEL4, - MSR_AMD_FAM15H_EVNTSEL5 -}; - -static inline int get_pmu_reg_type(u32 addr) -{ - if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) - return MSR_TYPE_CTRL; - - if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) - return MSR_TYPE_COUNTER; - - if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && - (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) - { - if (addr & 1) - return MSR_TYPE_COUNTER; - else - return MSR_TYPE_CTRL; - } - - /* unsupported registers */ - return -1; -} - -static inline u32 get_fam15h_addr(u32 addr) -{ - switch ( addr ) - { - case MSR_K7_PERFCTR0: - return MSR_AMD_FAM15H_PERFCTR0; - case MSR_K7_PERFCTR1: - return MSR_AMD_FAM15H_PERFCTR1; - case MSR_K7_PERFCTR2: - return MSR_AMD_FAM15H_PERFCTR2; - case MSR_K7_PERFCTR3: - return MSR_AMD_FAM15H_PERFCTR3; - case MSR_K7_EVNTSEL0: - return MSR_AMD_FAM15H_EVNTSEL0; - case MSR_K7_EVNTSEL1: - return MSR_AMD_FAM15H_EVNTSEL1; - case MSR_K7_EVNTSEL2: - return MSR_AMD_FAM15H_EVNTSEL2; - case MSR_K7_EVNTSEL3: - return MSR_AMD_FAM15H_EVNTSEL3; - default: - break; - } - - return addr; -} - -static void amd_vpmu_set_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); - } - - ctxt->msr_bitmap_set = 1; -} - -static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - - for ( i = 0; i < num_counters; i++ ) - { - svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); - svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); - } - - ctxt->msr_bitmap_set = 0; -} - -/* Must be NMI-safe */ -static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - return 1; -} - -static inline void context_load(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - for ( i = 0; i < num_counters; i++ ) - { - wrmsrl(counters[i], counter_regs[i]); - wrmsrl(ctrls[i], ctrl_regs[i]); - } -} - -/* Must be NMI-safe */ -static void amd_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - vpmu_reset(vpmu, VPMU_FROZEN); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - unsigned int i; - - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], ctrl_regs[i]); - - return; - } - - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - - context_load(v); -} - -static inline void context_save(struct vcpu *v) -{ - unsigned int i; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - - /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ - for ( i = 0; i < num_counters; i++ ) - rdmsrl(counters[i], counter_regs[i]); -} - -static int amd_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctx = vpmu->context; - unsigned int i; - - /* - * Stop the counters. If we came here via vpmu_save_force (i.e. - * when VPMU_CONTEXT_SAVE is set) counters are already stopped. - */ - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - { - vpmu_set(vpmu, VPMU_FROZEN); - - for ( i = 0; i < num_counters; i++ ) - wrmsrl(ctrls[i], 0); - - return 0; - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return 0; - - context_save(v); - - if ( has_hvm_container_domain(v->domain) && - !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - - return 1; -} - -static void context_update(unsigned int msr, u64 msr_content) -{ - unsigned int i; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - - if ( k7_counters_mirrored && - ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) - { - msr = get_fam15h_addr(msr); - } - - for ( i = 0; i < num_counters; i++ ) - { - if ( msr == ctrls[i] ) - { - ctrl_regs[i] = msr_content; - return; - } - else if (msr == counters[i] ) - { - counter_regs[i] = msr_content; - return; - } - } -} - -static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - /* For all counters, enable guest only mode for HVM guest */ - if ( has_hvm_container_domain(v->domain) && (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - !(is_guest_mode(msr_content)) ) - { - set_guest_mode(msr_content); - } - - /* check if the first counter is enabled */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) - return 1; - vpmu_set(vpmu, VPMU_RUNNING); - - if ( has_hvm_container_domain(v->domain) && - !((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_set_msr_bitmap(v); - } - - /* stop saving & restore if guest stops first counter */ - if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && - (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - vpmu_reset(vpmu, VPMU_RUNNING); - if ( has_hvm_container_domain(v->domain) && - ((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - release_pmu_ownship(PMU_OWNER_HVM); - } - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - /* Update vpmu context immediately */ - context_update(msr, msr_content); - - /* Write to hw counters */ - wrmsrl(msr, msr_content); - return 1; -} - -static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) - || vpmu_is_set(vpmu, VPMU_FROZEN) ) - { - context_load(v); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - vpmu_reset(vpmu, VPMU_FROZEN); - } - - rdmsrl(msr, *msr_content); - - return 1; -} - -static int amd_vpmu_initialise(struct vcpu *v) -{ - struct xen_pmu_amd_ctxt *ctxt; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return 0; - - if ( counters == NULL ) - { - switch ( family ) - { - case 0x15: - num_counters = F15H_NUM_COUNTERS; - counters = AMD_F15H_COUNTERS; - ctrls = AMD_F15H_CTRLS; - k7_counters_mirrored = 1; - break; - case 0x10: - case 0x12: - case 0x14: - case 0x16: - default: - num_counters = F10H_NUM_COUNTERS; - counters = AMD_F10H_COUNTERS; - ctrls = AMD_F10H_CTRLS; - k7_counters_mirrored = 0; - break; - } - } - - if ( has_hvm_container_domain(v->domain) ) - { - ctxt = xzalloc_bytes(sizeof(struct xen_pmu_amd_ctxt) + - sizeof(uint64_t) * AMD_MAX_COUNTERS + - sizeof(uint64_t) * AMD_MAX_COUNTERS); - if ( !ctxt ) - { - gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, " - " PMU feature is unavailable on domain %d vcpu %d.\n", - v->vcpu_id, v->domain->domain_id); - return -ENOMEM; - } - } - else - ctxt = &v->arch.vpmu.xenpmu_data->pmu.c.amd; - - ctxt->counters = sizeof(struct xen_pmu_amd_ctxt); - ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * AMD_MAX_COUNTERS; - - vpmu->context = ctxt; - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - return 0; -} - -static void amd_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( has_hvm_container_domain(v->domain) ) - { - if ( ((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) - amd_vpmu_unset_msr_bitmap(v); - - xfree(vpmu->context); - } - - vpmu->context = NULL; - vpmu_clear(vpmu); - release_pmu_ownship(PMU_OWNER_HVM); -} - -/* VPMU part of the 'q' keyhandler */ -static void amd_vpmu_dump(const struct vcpu *v) -{ - const struct vpmu_struct *vpmu = vcpu_vpmu(v); - const struct xen_pmu_amd_ctxt *ctxt = vpmu->context; - uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); - uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); - unsigned int i; - - printk(" VPMU state: 0x%x ", vpmu->flags); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - { - printk("\n"); - return; - } - - printk("("); - if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) - printk("PASSIVE_DOMAIN_ALLOCATED, "); - if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) - printk("FROZEN, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) - printk("SAVE, "); - if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) - printk("RUNNING, "); - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk("LOADED, "); - printk("ALLOCATED)\n"); - - for ( i = 0; i < num_counters; i++ ) - { - uint64_t ctrl, cntr; - - rdmsrl(ctrls[i], ctrl); - rdmsrl(counters[i], cntr); - printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n", - ctrls[i], ctrl_regs[i], ctrl, - counters[i], counter_regs[i], cntr); - } -} - -struct arch_vpmu_ops amd_vpmu_ops = { - .do_wrmsr = amd_vpmu_do_wrmsr, - .do_rdmsr = amd_vpmu_do_rdmsr, - .do_interrupt = amd_vpmu_do_interrupt, - .arch_vpmu_destroy = amd_vpmu_destroy, - .arch_vpmu_save = amd_vpmu_save, - .arch_vpmu_load = amd_vpmu_load, - .arch_vpmu_dump = amd_vpmu_dump -}; - -int svm_vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - int ret = 0; - - /* vpmu enabled? */ - if ( vpmu_mode == XENPMU_MODE_OFF ) - return 0; - - switch ( family ) - { - case 0x10: - case 0x12: - case 0x14: - case 0x15: - case 0x16: - ret = amd_vpmu_initialise(v); - if ( !ret ) - vpmu->arch_vpmu_ops = &amd_vpmu_ops; - return ret; - } - - printk("VPMU: Initialization failed. " - "AMD processor family %d has not " - "been supported\n", family); - return -EINVAL; -} - diff --git a/xen/arch/x86/hvm/vlapic.c b/xen/arch/x86/hvm/vlapic.c index d954f4f..d49ed3a 100644 --- a/xen/arch/x86/hvm/vlapic.c +++ b/xen/arch/x86/hvm/vlapic.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile index 373b3d9..04a29ce 100644 --- a/xen/arch/x86/hvm/vmx/Makefile +++ b/xen/arch/x86/hvm/vmx/Makefile @@ -3,5 +3,4 @@ obj-y += intr.o obj-y += realmode.o obj-y += vmcs.o obj-y += vmx.o -obj-y += vpmu_core2.o obj-y += vvmx.o diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c deleted file mode 100644 index 5a07817..0000000 --- a/xen/arch/x86/hvm/vmx/vpmu_core2.c +++ /dev/null @@ -1,940 +0,0 @@ -/* - * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID - * instruction. - * cpuid 0xa - Architectural Performance Monitoring Leaf - * Register eax - */ -#define PMU_VERSION_SHIFT 0 /* Version ID */ -#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ -#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) - -#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ -#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ -#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) - -#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ -#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ -#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) -/* Register edx */ -#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ -#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ -#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) - -#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ -#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ -#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) - -/* Alias registers (0x4c1) for full-width writes to PMCs */ -#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0)) -static bool_t __read_mostly full_width_write; - -/* Intel-specific VPMU features */ -#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ -#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ - -/* - * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed - * counters. 4 bits for every counter. - */ -#define FIXED_CTR_CTRL_BITS 4 -#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) - -/* Number of general-purpose and fixed performance counters */ -static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt; - -/* - * QUIRK to workaround an issue on various family 6 cpus. - * The issue leads to endless PMC interrupt loops on the processor. - * If the interrupt handler is running and a pmc reaches the value 0, this - * value remains forever and it triggers immediately a new interrupt after - * finishing the handler. - * A workaround is to read all flagged counters and if the value is 0 write - * 1 (or another value != 0) into it. - * There exist no errata and the real cause of this behaviour is unknown. - */ -bool_t __read_mostly is_pmc_quirk; - -static void check_pmc_quirk(void) -{ - if ( current_cpu_data.x86 == 6 ) - is_pmc_quirk = 1; - else - is_pmc_quirk = 0; -} - -static void handle_pmc_quirk(u64 msr_content) -{ - int i; - u64 val; - - if ( !is_pmc_quirk ) - return; - - val = msr_content; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_P6_PERFCTR0 + i, cnt); - if ( cnt == 0 ) - wrmsrl(MSR_P6_PERFCTR0 + i, 1); - } - val >>= 1; - } - val = msr_content >> 32; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - if ( val & 0x1 ) - { - u64 cnt; - rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); - if ( cnt == 0 ) - wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); - } - val >>= 1; - } -} - -/* - * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] - */ -static int core2_get_arch_pmc_count(void) -{ - u32 eax; - - eax = cpuid_eax(0xa); - return ( (eax & PMU_GENERAL_NR_MASK) >> PMU_GENERAL_NR_SHIFT ); -} - -/* - * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4] - */ -static int core2_get_fixed_pmc_count(void) -{ - u32 eax; - - eax = cpuid_eax(0xa); - return ( (eax & PMU_FIXED_NR_MASK) >> PMU_FIXED_NR_SHIFT ); -} - -/* edx bits 5-12: Bit width of fixed-function performance counters */ -static int core2_get_bitwidth_fix_count(void) -{ - u32 edx; - - edx = cpuid_edx(0xa); - return ( (edx & PMU_FIXED_WIDTH_MASK) >> PMU_FIXED_WIDTH_SHIFT ); -} - -static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) -{ - int i; - u32 msr_index_pmc; - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - if ( msr_index == MSR_CORE_PERF_FIXED_CTR0 + i ) - { - *type = MSR_TYPE_COUNTER; - *index = i; - return 1; - } - } - - if ( (msr_index == MSR_CORE_PERF_FIXED_CTR_CTRL ) || - (msr_index == MSR_IA32_DS_AREA) || - (msr_index == MSR_IA32_PEBS_ENABLE) ) - { - *type = MSR_TYPE_CTRL; - return 1; - } - - if ( (msr_index == MSR_CORE_PERF_GLOBAL_CTRL) || - (msr_index == MSR_CORE_PERF_GLOBAL_STATUS) || - (msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL) ) - { - *type = MSR_TYPE_GLOBAL; - return 1; - } - - msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; - if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) && - (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) - { - *type = MSR_TYPE_ARCH_COUNTER; - *index = msr_index_pmc - MSR_IA32_PERFCTR0; - return 1; - } - - if ( (msr_index >= MSR_P6_EVNTSEL0) && - (msr_index < (MSR_P6_EVNTSEL0 + arch_pmc_cnt)) ) - { - *type = MSR_TYPE_ARCH_CTRL; - *index = msr_index - MSR_P6_EVNTSEL0; - return 1; - } - - return 0; -} - -#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000) -static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - /* Allow Read/Write PMU Counters MSR Directly. */ - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), - msr_bitmap + 0x800/BYTES_PER_LONG); - - if ( full_width_write ) - { - clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - } - - /* Allow Read PMU Non-global Controls Directly. */ - for ( i = 0; i < arch_pmc_cnt; i++ ) - clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0 + i), msr_bitmap); - - clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); - clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); -} - -static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) -{ - int i; - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - - if ( full_width_write ) - { - set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), - msr_bitmap + 0x800/BYTES_PER_LONG); - } - } - - for ( i = 0; i < arch_pmc_cnt; i++ ) - set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0 + i), msr_bitmap); - - set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); - set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); -} - -static inline void __core2_vpmu_save(struct vcpu *v) -{ - int i; - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; - uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); - for ( i = 0; i < arch_pmc_cnt; i++ ) - rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter); - - if ( !has_hvm_container_domain(v->domain) ) - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); -} - -/* Must be NMI-safe */ -static int core2_vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !has_hvm_container_domain(v->domain) ) - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) - return 0; - - __core2_vpmu_save(v); - - /* Unset PMU MSR bitmap to trap lazy load. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap - && has_hvm_container_domain(v->domain) ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - - return 1; -} - -static inline void __core2_vpmu_load(struct vcpu *v) -{ - unsigned int i, pmc_start; - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; - uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - for ( i = 0; i < fixed_pmc_cnt; i++ ) - wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); - - if ( full_width_write ) - pmc_start = MSR_IA32_A_PERFCTR0; - else - pmc_start = MSR_IA32_PERFCTR0; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter); - wrmsrl(MSR_P6_EVNTSEL0 + i, xen_pmu_cntr_pair[i].control); - } - - wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl); - wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area); - wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable); - - if ( !has_hvm_container_domain(v->domain) ) - { - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); - core2_vpmu_cxt->global_ovf_ctrl = 0; - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); - } -} - -static void core2_vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return; - - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - - __core2_vpmu_load(v); -} - -static int core2_vpmu_alloc_resource(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt; - - if ( !is_pv_domain(v->domain) ) - { - if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) - return 0; - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err; - - if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) - goto out_err; - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - core2_vpmu_cxt = xzalloc_bytes(sizeof(struct xen_pmu_intel_ctxt) + - sizeof(uint64_t) * fixed_pmc_cnt + - sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt); - if ( !core2_vpmu_cxt ) - goto out_err; - } - else - { - core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.c.intel; - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - } - - core2_vpmu_cxt->fixed_counters = sizeof(struct xen_pmu_intel_ctxt); - core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters + - sizeof(uint64_t) * fixed_pmc_cnt; - - vpmu->context = (void *)core2_vpmu_cxt; - - vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); - - return 1; - -out_err: - vmx_rm_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL); - vmx_rm_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL); - release_pmu_ownship(PMU_OWNER_HVM); - - printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", - v->vcpu_id, v->domain->domain_id); - - return 0; -} - -static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( !is_core2_vpmu_msr(msr_index, type, index) ) - return 0; - - if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && - !core2_vpmu_alloc_resource(current) ) - return 0; - - /* Do the lazy load staff. */ - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - __core2_vpmu_load(current); - vpmu_set(vpmu, VPMU_CONTEXT_LOADED); - if ( cpu_has_vmx_msr_bitmap && has_hvm_container_domain(current->domain) ) - core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); - } - return 1; -} - -static void inject_trap(struct vcpu *v, unsigned int trapno) -{ - if ( has_hvm_container_domain(v->domain) ) - hvm_inject_hw_exception(trapno, 0); - else - send_guest_trap(v->domain, v->vcpu_id, trapno); -} - -static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - u64 global_ctrl, non_global_ctrl; - unsigned pmu_enable = 0; - int i, tmp; - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; - - if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) - { - /* Special handling for BTS */ - if ( msr == MSR_IA32_DEBUGCTLMSR ) - { - uint64_t supported = IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | - IA32_DEBUGCTLMSR_BTINT; - - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | - IA32_DEBUGCTLMSR_BTS_OFF_USR; - if ( msr_content & supported ) - { - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - return 1; - gdprintk(XENLOG_WARNING, "Debug Store is not supported on this cpu\n"); - inject_trap(v, TRAP_gp_fault); - return 0; - } - } - return 0; - } - - core2_vpmu_cxt = vpmu->context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - core2_vpmu_cxt->global_status &= ~msr_content; - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); - return 1; - case MSR_CORE_PERF_GLOBAL_STATUS: - gdprintk(XENLOG_INFO, "Can not write readonly MSR: " - "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); - inject_trap(v, TRAP_gp_fault); - return 1; - case MSR_IA32_PEBS_ENABLE: - if ( msr_content & 1 ) - gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " - "which is not supported.\n"); - core2_vpmu_cxt->pebs_enable = msr_content; - return 1; - case MSR_IA32_DS_AREA: - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - if ( !is_canonical_address(msr_content) ) - { - gdprintk(XENLOG_WARNING, - "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", - msr_content); - inject_trap(v, TRAP_gp_fault); - return 1; - } - core2_vpmu_cxt->ds_area = msr_content; - break; - } - gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); - return 1; - case MSR_CORE_PERF_GLOBAL_CTRL: - global_ctrl = msr_content; - for ( i = 0; i < arch_pmc_cnt; i++ ) - { - rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl); - pmu_enable += global_ctrl & (non_global_ctrl >> 22) & 1; - global_ctrl >>= 1; - } - - rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl); - global_ctrl = msr_content >> 32; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - pmu_enable += (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1 : 0); - non_global_ctrl >>= FIXED_CTR_CTRL_BITS; - global_ctrl >>= 1; - } - core2_vpmu_cxt->global_ctrl = msr_content; - break; - case MSR_CORE_PERF_FIXED_CTR_CTRL: - non_global_ctrl = msr_content; - if ( has_hvm_container_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); - global_ctrl >>= 32; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - pmu_enable += (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1 : 0); - non_global_ctrl >>= 4; - global_ctrl >>= 1; - } - core2_vpmu_cxt->fixed_ctrl = msr_content; - break; - default: - tmp = msr - MSR_P6_EVNTSEL0; - if ( tmp >= 0 && tmp < arch_pmc_cnt ) - { - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - if ( has_hvm_container_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); - xen_pmu_cntr_pair[tmp].control = msr_content; - for ( i = 0; i < arch_pmc_cnt && !pmu_enable; i++ ) - pmu_enable += (global_ctrl >> i) & - (xen_pmu_cntr_pair[i].control >> 22) & 1; - } - } - - pmu_enable += (core2_vpmu_cxt->ds_area != 0); - if ( pmu_enable ) - vpmu_set(vpmu, VPMU_RUNNING); - else - vpmu_reset(vpmu, VPMU_RUNNING); - - if ( type != MSR_TYPE_GLOBAL ) - { - u64 mask; - int inject_gp = 0; - switch ( type ) - { - case MSR_TYPE_ARCH_CTRL: /* MSR_P6_EVNTSEL[0,...] */ - mask = ~((1ull << 32) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - case MSR_TYPE_CTRL: /* IA32_FIXED_CTR_CTRL */ - if ( msr == MSR_IA32_DS_AREA ) - break; - /* 4 bits per counter, currently 3 fixed counters implemented. */ - mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - case MSR_TYPE_COUNTER: /* IA32_FIXED_CTR[0-2] */ - mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1); - if (msr_content & mask) - inject_gp = 1; - break; - } - - if (inject_gp) - inject_trap(v, TRAP_gp_fault); - else - wrmsrl(msr, msr_content); - } - else - { - if ( has_hvm_container_domain(v->domain) ) - vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - } - - return 1; -} - -static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - struct vcpu *v = current; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; - - if ( core2_vpmu_msr_common_check(msr, &type, &index) ) - { - core2_vpmu_cxt = vpmu->context; - switch ( msr ) - { - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *msr_content = 0; - break; - case MSR_CORE_PERF_GLOBAL_STATUS: - *msr_content = core2_vpmu_cxt->global_status; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - if ( has_hvm_container_domain(v->domain) ) - vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); - else - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); - break; - default: - rdmsrl(msr, *msr_content); - } - } - else - { - /* Extension for BTS */ - if ( msr == MSR_IA32_MISC_ENABLE ) - { - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) - *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; - } - else - return 0; - } - - return 1; -} - -static void core2_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - if (input == 0x1) - { - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) - { - /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ - *edx |= cpufeat_mask(X86_FEATURE_DS); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) - *ecx |= cpufeat_mask(X86_FEATURE_DTES64); - if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) - *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); - } - } -} - -/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ -static void core2_vpmu_dump(const struct vcpu *v) -{ - const struct vpmu_struct *vpmu = vcpu_vpmu(v); - int i; - const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; - u64 val; - uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); - struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = - vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) - { - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - printk(" vPMU loaded\n"); - else - printk(" vPMU allocated\n"); - return; - } - - printk(" vPMU running\n"); - core2_vpmu_cxt = vpmu->context; - - /* Print the contents of the counter and its configuration msr. */ - for ( i = 0; i < arch_pmc_cnt; i++ ) - printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", - i, xen_pmu_cntr_pair[i].counter, xen_pmu_cntr_pair[i].control); - - /* - * The configuration of the fixed counter is 4 bits each in the - * MSR_CORE_PERF_FIXED_CTR_CTRL. - */ - val = core2_vpmu_cxt->fixed_ctrl; - for ( i = 0; i < fixed_pmc_cnt; i++ ) - { - printk(" fixed_%d: 0x%016lx ctrl: %#lx\n", - i, fixed_counters[i], - val & FIXED_CTR_CTRL_MASK); - val >>= FIXED_CTR_CTRL_BITS; - } -} - -static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - u64 msr_content; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); - if ( msr_content ) - { - if ( is_pmc_quirk ) - handle_pmc_quirk(msr_content); - core2_vpmu_cxt->global_status |= msr_content; - msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); - } - else - { - /* No PMC overflow but perhaps a Trace Message interrupt. */ - __vmread(GUEST_IA32_DEBUGCTL, &msr_content); - if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) - return 0; - } - - return 1; -} - -static int core2_vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - u64 msr_content; - struct cpuinfo_x86 *c = ¤t_cpu_data; - - if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) ) - goto func_out; - /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ - if ( cpu_has(c, X86_FEATURE_DS) ) - { - if ( !cpu_has(c, X86_FEATURE_DTES64) ) - { - printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" - " - Debug Store disabled for d%d:v%d\n", - v->domain->domain_id, v->vcpu_id); - goto func_out; - } - vpmu_set(vpmu, VPMU_CPU_HAS_DS); - rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); - if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) - { - /* If BTS_UNAVAIL is set reset the DS feature. */ - vpmu_reset(vpmu, VPMU_CPU_HAS_DS); - printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" - " - Debug Store disabled for d%d:v%d\n", - v->domain->domain_id, v->vcpu_id); - } - else - { - vpmu_set(vpmu, VPMU_CPU_HAS_BTS); - if ( !cpu_has(c, X86_FEATURE_DSCPL) ) - printk(XENLOG_G_INFO - "vpmu: CPU doesn't support CPL-Qualified BTS\n"); - printk("******************************************************\n"); - printk("** WARNING: Emulation of BTS Feature is switched on **\n"); - printk("** Using this processor feature in a virtualized **\n"); - printk("** environment is not 100%% safe. **\n"); - printk("** Setting the DS buffer address with wrong values **\n"); - printk("** may lead to hypervisor hangs or crashes. **\n"); - printk("** It is NOT recommended for production use! **\n"); - printk("******************************************************\n"); - } - } -func_out: - - arch_pmc_cnt = core2_get_arch_pmc_count(); - fixed_pmc_cnt = core2_get_fixed_pmc_count(); - check_pmc_quirk(); - - /* PV domains can allocate resources immediately */ - if ( !has_hvm_container_domain(v->domain) && !core2_vpmu_alloc_resource(v) ) - return 1; - - return 0; -} - -static void core2_vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - if ( has_hvm_container_domain(v->domain) ) - { - xfree(vpmu->context); - if ( cpu_has_vmx_msr_bitmap ) - core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); - } - - release_pmu_ownship(PMU_OWNER_HVM); - vpmu->context = NULL; - vpmu_clear(vpmu); -} - -struct arch_vpmu_ops core2_vpmu_ops = { - .do_wrmsr = core2_vpmu_do_wrmsr, - .do_rdmsr = core2_vpmu_do_rdmsr, - .do_interrupt = core2_vpmu_do_interrupt, - .do_cpuid = core2_vpmu_do_cpuid, - .arch_vpmu_destroy = core2_vpmu_destroy, - .arch_vpmu_save = core2_vpmu_save, - .arch_vpmu_load = core2_vpmu_load, - .arch_vpmu_dump = core2_vpmu_dump -}; - -static void core2_no_vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* - * As in this case the vpmu is not enabled reset some bits in the - * architectural performance monitoring related part. - */ - if ( input == 0xa ) - { - *eax &= ~PMU_VERSION_MASK; - *eax &= ~PMU_GENERAL_NR_MASK; - *eax &= ~PMU_GENERAL_WIDTH_MASK; - - *edx &= ~PMU_FIXED_NR_MASK; - *edx &= ~PMU_FIXED_WIDTH_MASK; - } -} - -/* - * If its a vpmu msr set it to 0. - */ -static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - int type = -1, index = -1; - if ( !is_core2_vpmu_msr(msr, &type, &index) ) - return 0; - *msr_content = 0; - return 1; -} - -/* - * These functions are used in case vpmu is not enabled. - */ -struct arch_vpmu_ops core2_no_vpmu_ops = { - .do_rdmsr = core2_no_vpmu_do_rdmsr, - .do_cpuid = core2_no_vpmu_do_cpuid, -}; - -int vmx_vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t family = current_cpu_data.x86; - uint8_t cpu_model = current_cpu_data.x86_model; - int ret = 0; - - vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; - if ( vpmu_mode == XENPMU_MODE_OFF ) - return 0; - - if ( family == 6 ) - { - u64 caps; - - rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps); - full_width_write = (caps >> 13) & 1; - - switch ( cpu_model ) - { - /* Core2: */ - case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 0x1d: /* six-core 45 nm xeon "Dunnington" */ - - case 0x2a: /* SandyBridge */ - case 0x2d: /* SandyBridge, "Romley-EP" */ - - /* Nehalem: */ - case 0x1a: /* 45 nm nehalem, "Bloomfield" */ - case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ - case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ - - /* Westmere: */ - case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ - case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ - case 0x27: /* 32 nm Westmere-EX */ - - case 0x3a: /* IvyBridge */ - case 0x3e: /* IvyBridge EP */ - - /* Haswell: */ - case 0x3c: - case 0x3f: - case 0x45: - case 0x46: - ret = core2_vpmu_initialise(v); - if ( !ret ) - vpmu->arch_vpmu_ops = &core2_vpmu_ops; - return ret; - } - } - - printk("VPMU: Initialization failed. " - "Intel processor family %d model %d has not " - "been supported\n", family, cpu_model); - return -EINVAL; -} - diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c deleted file mode 100644 index 3645e4c..0000000 --- a/xen/arch/x86/hvm/vpmu.c +++ /dev/null @@ -1,720 +0,0 @@ -/* - * vpmu.c: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * "vpmu" : vpmu generally enabled - * "vpmu=off" : vpmu generally disabled - * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. - */ -uint64_t __read_mostly vpmu_mode = XENPMU_MODE_OFF; -uint64_t __read_mostly vpmu_features = 0; -static void parse_vpmu_param(char *s); -custom_param("vpmu", parse_vpmu_param); - -static DEFINE_PER_CPU(struct vcpu *, last_vcpu); -static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu); - -static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR; - -static void __init parse_vpmu_param(char *s) -{ - char *ss; - - vpmu_mode = XENPMU_MODE_ON; - if (*s == '\0') - return; - - do { - ss = strchr(s, ','); - if ( ss ) - *ss = '\0'; - - switch ( parse_bool(s) ) - { - case 0: - vpmu_mode = XENPMU_MODE_OFF; - return; - case -1: - if ( !strcmp(s, "nmi") ) - vpmu_interrupt_type = APIC_DM_NMI; - else if ( !strcmp(s, "bts") ) - vpmu_features |= XENPMU_FEATURE_INTEL_BTS; - else if ( !strcmp(s, "priv") ) - { - vpmu_mode &= ~XENPMU_MODE_ON; - vpmu_mode |= XENPMU_MODE_PRIV; - } - else - { - printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); - vpmu_mode = XENPMU_MODE_OFF; - return; - } - default: - break; - } - - s = ss + 1; - } while ( ss ); -} - -void vpmu_lvtpc_update(uint32_t val) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED); - - /* Postpone APIC updates for PV guests if PMU interrupt is pending */ - if ( !has_hvm_container_domain(current->domain) || - !(current->arch.vpmu.xenpmu_data && - current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); -} - -static void vpmu_send_nmi(struct vcpu *v) -{ - struct vlapic *vlapic; - u32 vlapic_lvtpc; - unsigned char int_vec; - - ASSERT( is_hvm_vcpu(v) ); - - vlapic = vcpu_vlapic(v); - if ( !is_vlapic_lvtpc_enabled(vlapic) ) - return; - - vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); - int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; - - if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) - vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); - else - v->nmi_pending = 1; -} - -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( (vpmu_mode == XENPMU_MODE_OFF) || - ((vpmu_mode & XENPMU_MODE_PRIV) && !is_control_domain(current->domain)) ) - return 0; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) - { - int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); - - /* - * We may have received a PMU interrupt during WRMSR handling - * and since do_wrmsr may load VPMU context we should save - * (and unload) it again. - */ - if ( !has_hvm_container_domain(current->domain) && - (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) - { - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - vpmu->arch_vpmu_ops->arch_vpmu_save(current); - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - } - return ret; - } - return 0; -} - -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( (vpmu_mode == XENPMU_MODE_OFF) || - ((vpmu_mode & XENPMU_MODE_PRIV) && !is_control_domain(current->domain)) ) - return 0; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) - { - int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); - - if ( !has_hvm_container_domain(current->domain) && - (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) - { - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - vpmu->arch_vpmu_ops->arch_vpmu_save(current); - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - } - return ret; - } - return 0; -} - -/* This routine may be called in NMI context */ -int vpmu_do_interrupt(struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - struct vpmu_struct *vpmu; - - /* dom0 will handle this interrupt */ - if ( (vpmu_mode & XENPMU_MODE_PRIV) || - (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) - v = dom0->vcpu[smp_processor_id() % dom0->max_vcpus]; - - vpmu = vcpu_vpmu(v); - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return 0; - - if ( !is_hvm_domain(v->domain) || (vpmu_mode & XENPMU_MODE_PRIV) ) - { - /* PV(H) guest or dom0 is doing system profiling */ - struct cpu_user_regs *gregs; - int err; - - if ( v->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED ) - return 1; - - if ( is_pvh_domain(current->domain) && !(vpmu_mode & XENPMU_MODE_PRIV) && - !vpmu->arch_vpmu_ops->do_interrupt(regs) ) - return 0; - - /* PV guest will be reading PMU MSRs from xenpmu_data */ - vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - err = vpmu->arch_vpmu_ops->arch_vpmu_save(v); - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); - - if ( !is_hvm_domain(current->domain) ) - { - /* Store appropriate registers in xenpmu_data */ - if ( is_pv_32bit_domain(current->domain) ) - { - gregs = guest_cpu_user_regs(); - - if ( (vpmu_mode & XENPMU_MODE_PRIV) && - !is_pv_32bit_domain(v->domain) ) - memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, - gregs, sizeof(struct cpu_user_regs)); - else - { - /* - * 32-bit dom0 cannot process Xen's addresses (which are - * 64 bit) and therefore we treat it the same way as a - * non-priviledged PV 32-bit domain. - */ - - struct compat_cpu_user_regs *cmp; - - cmp = (struct compat_cpu_user_regs *) - &v->arch.vpmu.xenpmu_data->pmu.r.regs; - XLAT_cpu_user_regs(cmp, gregs); - memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, - &cmp, sizeof(struct compat_cpu_user_regs)); - } - } - else if ( !is_control_domain(current->domain) && - !is_idle_vcpu(current) ) - { - /* PV(H) guest */ - gregs = guest_cpu_user_regs(); - memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, - gregs, sizeof(struct cpu_user_regs)); - } - else - memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, - regs, sizeof(struct cpu_user_regs)); - - gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; - if ( !is_pvh_domain(current->domain) ) - gregs->cs = (current->arch.flags & TF_kernel_mode) ? 0 : 0x3; - else if ( !(vpmu_interrupt_type & APIC_DM_NMI) ) - { - struct segment_register seg_cs; - - hvm_get_segment_register(current, x86_seg_cs, &seg_cs); - gregs->cs = seg_cs.attr.fields.dpl; - } - } - else - { - /* HVM guest */ - struct segment_register cs; - - gregs = guest_cpu_user_regs(); - memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, - gregs, sizeof(struct cpu_user_regs)); - - /* This is unsafe in NMI context, we'll do it in softint handler */ - if ( !(vpmu_interrupt_type & APIC_DM_NMI ) ) - { - hvm_get_segment_register(current, x86_seg_cs, &cs); - gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; - gregs->cs = cs.attr.fields.dpl; - } - } - - v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id; - v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id; - v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id(); - - if ( !is_pvh_domain(current->domain) || (vpmu_mode & XENPMU_MODE_PRIV) ) - v->arch.vpmu.xenpmu_data->pmu_flags |= PMU_CACHED; - apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); - vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; - - if ( vpmu_interrupt_type & APIC_DM_NMI ) - { - per_cpu(sampled_vcpu, smp_processor_id()) = current; - raise_softirq(PMU_SOFTIRQ); - } - else - send_guest_vcpu_virq(v, VIRQ_XENPMU); - - return 1; - } - - if ( vpmu->arch_vpmu_ops ) - { - if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) - return 0; - - if ( vpmu_interrupt_type & APIC_DM_NMI ) - { - per_cpu(sampled_vcpu, smp_processor_id()) = current; - raise_softirq(PMU_SOFTIRQ); - } - else - vpmu_send_nmi(v); - - return 1; - } - - return 0; -} - -void vpmu_do_cpuid(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(current); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) - vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); -} - -static void vpmu_save_force(void *arg) -{ - struct vcpu *v = (struct vcpu *)arg; - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - return; - - if ( vpmu->arch_vpmu_ops ) - (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); - - vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); - - per_cpu(last_vcpu, smp_processor_id()) = NULL; -} - -void vpmu_save(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - - if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) - return; - - vpmu->last_pcpu = pcpu; - per_cpu(last_vcpu, pcpu) = v; - - if ( vpmu->arch_vpmu_ops ) - if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED); -} - -void vpmu_load(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - int pcpu = smp_processor_id(); - struct vcpu *prev = NULL; - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - return; - - /* First time this VCPU is running here */ - if ( vpmu->last_pcpu != pcpu ) - { - /* - * Get the context from last pcpu that we ran on. Note that if another - * VCPU is running there it must have saved this VPCU's context before - * startig to run (see below). - * There should be no race since remote pcpu will disable interrupts - * before saving the context. - */ - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - on_selected_cpus(cpumask_of(vpmu->last_pcpu), - vpmu_save_force, (void *)v, 1); - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - } - } - - /* Prevent forced context save from remote CPU */ - local_irq_disable(); - - prev = per_cpu(last_vcpu, pcpu); - - if ( prev != v && prev ) - { - vpmu = vcpu_vpmu(prev); - - /* Someone ran here before us */ - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - vpmu_save_force(prev); - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - vpmu = vcpu_vpmu(v); - } - - local_irq_enable(); - - /* Only when PMU is counting, we load PMU context immediately. */ - if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || - (!has_hvm_container_domain(v->domain) && vpmu->xenpmu_data->pmu_flags & PMU_CACHED) ) - return; - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) - { - apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); - /* Arch code needs to set VPMU_CONTEXT_LOADED */ - vpmu->arch_vpmu_ops->arch_vpmu_load(v); - } -} - -void vpmu_initialise(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - uint8_t vendor = current_cpu_data.x86_vendor; - - if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) - vpmu_destroy(v); - vpmu_clear(vpmu); - vpmu->context = NULL; - - switch ( vendor ) - { - case X86_VENDOR_AMD: - if ( svm_vpmu_initialise(v) != 0 ) - vpmu_mode = XENPMU_MODE_OFF; - return; - - case X86_VENDOR_INTEL: - if ( vmx_vpmu_initialise(v) != 0 ) - vpmu_mode = XENPMU_MODE_OFF; - return; - - default: - printk("VPMU: Initialization failed. " - "Unknown CPU vendor %d\n", vendor); - vpmu_mode = XENPMU_MODE_OFF; - return; - } -} - -void vpmu_destroy(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) - { - /* Unload VPMU first. This will stop counters */ - on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), - vpmu_save_force, (void *)v, 1); - - vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); - } -} - -/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ -void vpmu_dump(struct vcpu *v) -{ - struct vpmu_struct *vpmu = vcpu_vpmu(v); - - if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) - vpmu->arch_vpmu_ops->arch_vpmu_dump(v); -} - -/* Unload VPMU contexts */ -static void vpmu_unload_all(void) -{ - struct domain *d; - struct vcpu *v; - struct vpmu_struct *vpmu; - - for_each_domain(d) - { - for_each_vcpu ( d, v ) - { - if ( v != current ) - vcpu_pause(v); - vpmu = vcpu_vpmu(v); - - if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) - { - if ( v != current ) - vcpu_unpause(v); - continue; - } - - vpmu_set(vpmu, VPMU_CONTEXT_SAVE); - on_selected_cpus(cpumask_of(vpmu->last_pcpu), - vpmu_save_force, (void *)v, 1); - vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); - - if ( v != current ) - vcpu_unpause(v); - } - } -} - -/* Process the softirq set by PMU NMI handler */ -static void pmu_softnmi(void) -{ - struct cpu_user_regs *regs; - struct vcpu *v, *sampled = per_cpu(sampled_vcpu, smp_processor_id()); - - if ( (vpmu_mode & XENPMU_MODE_PRIV) || - (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) - v = dom0->vcpu[smp_processor_id() % dom0->max_vcpus]; - else - { - if ( is_hvm_domain(sampled->domain) ) - { - vpmu_send_nmi(sampled); - return; - } - v = sampled; - } - - regs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; - if ( has_hvm_container_domain(sampled->domain) ) - { - struct segment_register cs; - - hvm_get_segment_register(sampled, x86_seg_cs, &cs); - regs->cs = cs.attr.fields.dpl; - } - - send_guest_vcpu_virq(v, VIRQ_XENPMU); -} - -int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu) -{ - return vpmu_do_interrupt(regs); -} - -static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) -{ - struct vcpu *v; - struct page_info *page; - uint64_t gfn = params->d.val; - static bool_t __read_mostly pvpmu_initted = 0; - - if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) - return -EINVAL; - - page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); - if ( !page ) - return -EINVAL; - - if ( !get_page_type(page, PGT_writable_page) ) - { - put_page(page); - return -EINVAL; - } - - v = d->vcpu[params->vcpu]; - v->arch.vpmu.xenpmu_data = __map_domain_page_global(page); - if ( !v->arch.vpmu.xenpmu_data ) - { - put_page_and_type(page); - return -EINVAL; - } - - if ( !pvpmu_initted ) - { - if (reserve_lapic_nmi() == 0) - set_nmi_callback(pmu_nmi_interrupt); - else - { - printk("Failed to reserve PMU NMI\n"); - put_page(page); - return -EBUSY; - } - open_softirq(PMU_SOFTIRQ, pmu_softnmi); - - pvpmu_initted = 1; - } - - vpmu_initialise(v); - - return 0; -} - -static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params) -{ - struct vcpu *v; - uint64_t mfn; - - if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) - return; - - v = d->vcpu[params->vcpu]; - if (v != current) - vcpu_pause(v); - - if ( v->arch.vpmu.xenpmu_data ) - { - mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); - if ( mfn_valid(mfn) ) - { - unmap_domain_page_global(v->arch.vpmu.xenpmu_data); - put_page_and_type(mfn_to_page(mfn)); - } - } - vpmu_destroy(v); - - if (v != current) - vcpu_unpause(v); -} - -long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) -{ - int ret = -EINVAL; - xen_pmu_params_t pmu_params; - - switch ( op ) - { - case XENPMU_mode_set: - if ( !is_control_domain(current->domain) ) - return -EPERM; - - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - if ( (pmu_params.d.val & ~(XENPMU_MODE_ON | XENPMU_MODE_PRIV)) || - ((pmu_params.d.val & XENPMU_MODE_ON) && - (pmu_params.d.val & XENPMU_MODE_PRIV)) ) - return -EINVAL; - - vpmu_mode = pmu_params.d.val; - - if ( (vpmu_mode == XENPMU_MODE_OFF) || (vpmu_mode & XENPMU_MODE_PRIV) ) - /* - * After this VPMU context will never be loaded during context - * switch. Because PMU MSR accesses load VPMU context we don't - * allow them when VPMU is off and, for non-provileged domains, - * when we are in privileged mode. (We do want these accesses to - * load VPMU context for control domain in this mode) - */ - vpmu_unload_all(); - - ret = 0; - break; - - case XENPMU_mode_get: - pmu_params.d.val = vpmu_mode; - pmu_params.v.version.maj = XENPMU_VER_MAJ; - pmu_params.v.version.min = XENPMU_VER_MIN; - if ( copy_to_guest(arg, &pmu_params, 1) ) - return -EFAULT; - ret = 0; - break; - - case XENPMU_feature_set: - if ( !is_control_domain(current->domain) ) - return -EPERM; - - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - - if ( pmu_params.d.val & ~XENPMU_FEATURE_INTEL_BTS ) - return -EINVAL; - - vpmu_features = pmu_params.d.val; - - ret = 0; - break; - - case XENPMU_feature_get: - pmu_params.d.val = vpmu_mode; - if ( copy_to_guest(arg, &pmu_params, 1) ) - return -EFAULT; - ret = 0; - break; - - case XENPMU_init: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - ret = pvpmu_init(current->domain, &pmu_params); - break; - - case XENPMU_finish: - if ( copy_from_guest(&pmu_params, arg, 1) ) - return -EFAULT; - pvpmu_finish(current->domain, &pmu_params); - break; - - case XENPMU_lvtpc_set: - if ( current->arch.vpmu.xenpmu_data == NULL ) - return -EINVAL; - vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); - ret = 0; - break; - case XENPMU_flush: - current->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED; - vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); - vpmu_load(current); - ret = 0; - break; - } - - return ret; -} diff --git a/xen/arch/x86/oprofile/op_model_ppro.c b/xen/arch/x86/oprofile/op_model_ppro.c index 5aae2e7..bf5d9a5 100644 --- a/xen/arch/x86/oprofile/op_model_ppro.c +++ b/xen/arch/x86/oprofile/op_model_ppro.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" #include "op_counter.h" diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 83ea479..7fb1d30 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -72,7 +72,7 @@ #include #include #include -#include +#include #include #include diff --git a/xen/arch/x86/vpmu.c b/xen/arch/x86/vpmu.c new file mode 100644 index 0000000..8c2723b --- /dev/null +++ b/xen/arch/x86/vpmu.c @@ -0,0 +1,720 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * "vpmu" : vpmu generally enabled + * "vpmu=off" : vpmu generally disabled + * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on. + */ +uint64_t __read_mostly vpmu_mode = XENPMU_MODE_OFF; +uint64_t __read_mostly vpmu_features = 0; +static void parse_vpmu_param(char *s); +custom_param("vpmu", parse_vpmu_param); + +static DEFINE_PER_CPU(struct vcpu *, last_vcpu); +static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu); + +static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR; + +static void __init parse_vpmu_param(char *s) +{ + char *ss; + + vpmu_mode = XENPMU_MODE_ON; + if (*s == '\0') + return; + + do { + ss = strchr(s, ','); + if ( ss ) + *ss = '\0'; + + switch ( parse_bool(s) ) + { + case 0: + vpmu_mode = XENPMU_MODE_OFF; + return; + case -1: + if ( !strcmp(s, "nmi") ) + vpmu_interrupt_type = APIC_DM_NMI; + else if ( !strcmp(s, "bts") ) + vpmu_features |= XENPMU_FEATURE_INTEL_BTS; + else if ( !strcmp(s, "priv") ) + { + vpmu_mode &= ~XENPMU_MODE_ON; + vpmu_mode |= XENPMU_MODE_PRIV; + } + else + { + printk("VPMU: unknown flag: %s - vpmu disabled!\n", s); + vpmu_mode = XENPMU_MODE_OFF; + return; + } + default: + break; + } + + s = ss + 1; + } while ( ss ); +} + +void vpmu_lvtpc_update(uint32_t val) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED); + + /* Postpone APIC updates for PV guests if PMU interrupt is pending */ + if ( !has_hvm_container_domain(current->domain) || + !(current->arch.vpmu.xenpmu_data && + current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc); +} + +static void vpmu_send_nmi(struct vcpu *v) +{ + struct vlapic *vlapic; + u32 vlapic_lvtpc; + unsigned char int_vec; + + ASSERT( is_hvm_vcpu(v) ); + + vlapic = vcpu_vlapic(v); + if ( !is_vlapic_lvtpc_enabled(vlapic) ) + return; + + vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC); + int_vec = vlapic_lvtpc & APIC_VECTOR_MASK; + + if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED ) + vlapic_set_irq(vcpu_vlapic(v), int_vec, 0); + else + v->nmi_pending = 1; +} + +int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode & XENPMU_MODE_PRIV) && !is_control_domain(current->domain)) ) + return 0; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr ) + { + int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content); + + /* + * We may have received a PMU interrupt during WRMSR handling + * and since do_wrmsr may load VPMU context we should save + * (and unload) it again. + */ + if ( !has_hvm_container_domain(current->domain) && + (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu->arch_vpmu_ops->arch_vpmu_save(current); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + } + return ret; + } + return 0; +} + +int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( (vpmu_mode == XENPMU_MODE_OFF) || + ((vpmu_mode & XENPMU_MODE_PRIV) && !is_control_domain(current->domain)) ) + return 0; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr ) + { + int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content); + + if ( !has_hvm_container_domain(current->domain) && + (current->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu->arch_vpmu_ops->arch_vpmu_save(current); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + } + return ret; + } + return 0; +} + +/* This routine may be called in NMI context */ +int vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu; + + /* dom0 will handle this interrupt */ + if ( (vpmu_mode & XENPMU_MODE_PRIV) || + (v->domain->domain_id >= DOMID_FIRST_RESERVED) ) + v = dom0->vcpu[smp_processor_id() % dom0->max_vcpus]; + + vpmu = vcpu_vpmu(v); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return 0; + + if ( !is_hvm_domain(v->domain) || (vpmu_mode & XENPMU_MODE_PRIV) ) + { + /* PV(H) guest or dom0 is doing system profiling */ + struct cpu_user_regs *gregs; + int err; + + if ( v->arch.vpmu.xenpmu_data->pmu_flags & PMU_CACHED ) + return 1; + + if ( is_pvh_domain(current->domain) && !(vpmu_mode & XENPMU_MODE_PRIV) && + !vpmu->arch_vpmu_ops->do_interrupt(regs) ) + return 0; + + /* PV guest will be reading PMU MSRs from xenpmu_data */ + vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + err = vpmu->arch_vpmu_ops->arch_vpmu_save(v); + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED); + + if ( !is_hvm_domain(current->domain) ) + { + /* Store appropriate registers in xenpmu_data */ + if ( is_pv_32bit_domain(current->domain) ) + { + gregs = guest_cpu_user_regs(); + + if ( (vpmu_mode & XENPMU_MODE_PRIV) && + !is_pv_32bit_domain(v->domain) ) + memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, + gregs, sizeof(struct cpu_user_regs)); + else + { + /* + * 32-bit dom0 cannot process Xen's addresses (which are + * 64 bit) and therefore we treat it the same way as a + * non-priviledged PV 32-bit domain. + */ + + struct compat_cpu_user_regs *cmp; + + cmp = (struct compat_cpu_user_regs *) + &v->arch.vpmu.xenpmu_data->pmu.r.regs; + XLAT_cpu_user_regs(cmp, gregs); + memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, + &cmp, sizeof(struct compat_cpu_user_regs)); + } + } + else if ( !is_control_domain(current->domain) && + !is_idle_vcpu(current) ) + { + /* PV(H) guest */ + gregs = guest_cpu_user_regs(); + memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, + gregs, sizeof(struct cpu_user_regs)); + } + else + memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, + regs, sizeof(struct cpu_user_regs)); + + gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; + if ( !is_pvh_domain(current->domain) ) + gregs->cs = (current->arch.flags & TF_kernel_mode) ? 0 : 0x3; + else if ( !(vpmu_interrupt_type & APIC_DM_NMI) ) + { + struct segment_register seg_cs; + + hvm_get_segment_register(current, x86_seg_cs, &seg_cs); + gregs->cs = seg_cs.attr.fields.dpl; + } + } + else + { + /* HVM guest */ + struct segment_register cs; + + gregs = guest_cpu_user_regs(); + memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs, + gregs, sizeof(struct cpu_user_regs)); + + /* This is unsafe in NMI context, we'll do it in softint handler */ + if ( !(vpmu_interrupt_type & APIC_DM_NMI ) ) + { + hvm_get_segment_register(current, x86_seg_cs, &cs); + gregs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; + gregs->cs = cs.attr.fields.dpl; + } + } + + v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id; + v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id; + v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id(); + + if ( !is_pvh_domain(current->domain) || (vpmu_mode & XENPMU_MODE_PRIV) ) + v->arch.vpmu.xenpmu_data->pmu_flags |= PMU_CACHED; + apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED); + vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED; + + if ( vpmu_interrupt_type & APIC_DM_NMI ) + { + per_cpu(sampled_vcpu, smp_processor_id()) = current; + raise_softirq(PMU_SOFTIRQ); + } + else + send_guest_vcpu_virq(v, VIRQ_XENPMU); + + return 1; + } + + if ( vpmu->arch_vpmu_ops ) + { + if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ) + return 0; + + if ( vpmu_interrupt_type & APIC_DM_NMI ) + { + per_cpu(sampled_vcpu, smp_processor_id()) = current; + raise_softirq(PMU_SOFTIRQ); + } + else + vpmu_send_nmi(v); + + return 1; + } + + return 0; +} + +void vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_cpuid ) + vpmu->arch_vpmu_ops->do_cpuid(input, eax, ebx, ecx, edx); +} + +static void vpmu_save_force(void *arg) +{ + struct vcpu *v = (struct vcpu *)arg; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + if ( vpmu->arch_vpmu_ops ) + (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v); + + vpmu_reset(vpmu, VPMU_CONTEXT_SAVE); + + per_cpu(last_vcpu, smp_processor_id()) = NULL; +} + +void vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + + if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) ) + return; + + vpmu->last_pcpu = pcpu; + per_cpu(last_vcpu, pcpu) = v; + + if ( vpmu->arch_vpmu_ops ) + if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) ) + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED); +} + +void vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + int pcpu = smp_processor_id(); + struct vcpu *prev = NULL; + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + /* First time this VCPU is running here */ + if ( vpmu->last_pcpu != pcpu ) + { + /* + * Get the context from last pcpu that we ran on. Note that if another + * VCPU is running there it must have saved this VPCU's context before + * startig to run (see below). + * There should be no race since remote pcpu will disable interrupts + * before saving the context. + */ + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_save_force, (void *)v, 1); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + } + } + + /* Prevent forced context save from remote CPU */ + local_irq_disable(); + + prev = per_cpu(last_vcpu, pcpu); + + if ( prev != v && prev ) + { + vpmu = vcpu_vpmu(prev); + + /* Someone ran here before us */ + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + vpmu_save_force(prev); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + vpmu = vcpu_vpmu(v); + } + + local_irq_enable(); + + /* Only when PMU is counting, we load PMU context immediately. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) || + (!has_hvm_container_domain(v->domain) && vpmu->xenpmu_data->pmu_flags & PMU_CACHED) ) + return; + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load ) + { + apic_write_around(APIC_LVTPC, vpmu->hw_lapic_lvtpc); + /* Arch code needs to set VPMU_CONTEXT_LOADED */ + vpmu->arch_vpmu_ops->arch_vpmu_load(v); + } +} + +void vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t vendor = current_cpu_data.x86_vendor; + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + vpmu_destroy(v); + vpmu_clear(vpmu); + vpmu->context = NULL; + + switch ( vendor ) + { + case X86_VENDOR_AMD: + if ( svm_vpmu_initialise(v) != 0 ) + vpmu_mode = XENPMU_MODE_OFF; + return; + + case X86_VENDOR_INTEL: + if ( vmx_vpmu_initialise(v) != 0 ) + vpmu_mode = XENPMU_MODE_OFF; + return; + + default: + printk("VPMU: Initialization failed. " + "Unknown CPU vendor %d\n", vendor); + vpmu_mode = XENPMU_MODE_OFF; + return; + } +} + +void vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy ) + { + /* Unload VPMU first. This will stop counters */ + on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu), + vpmu_save_force, (void *)v, 1); + + vpmu->arch_vpmu_ops->arch_vpmu_destroy(v); + } +} + +/* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */ +void vpmu_dump(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump ) + vpmu->arch_vpmu_ops->arch_vpmu_dump(v); +} + +/* Unload VPMU contexts */ +static void vpmu_unload_all(void) +{ + struct domain *d; + struct vcpu *v; + struct vpmu_struct *vpmu; + + for_each_domain(d) + { + for_each_vcpu ( d, v ) + { + if ( v != current ) + vcpu_pause(v); + vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + if ( v != current ) + vcpu_unpause(v); + continue; + } + + vpmu_set(vpmu, VPMU_CONTEXT_SAVE); + on_selected_cpus(cpumask_of(vpmu->last_pcpu), + vpmu_save_force, (void *)v, 1); + vpmu_reset(vpmu, VPMU_CONTEXT_LOADED); + + if ( v != current ) + vcpu_unpause(v); + } + } +} + +/* Process the softirq set by PMU NMI handler */ +static void pmu_softnmi(void) +{ + struct cpu_user_regs *regs; + struct vcpu *v, *sampled = per_cpu(sampled_vcpu, smp_processor_id()); + + if ( (vpmu_mode & XENPMU_MODE_PRIV) || + (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) ) + v = dom0->vcpu[smp_processor_id() % dom0->max_vcpus]; + else + { + if ( is_hvm_domain(sampled->domain) ) + { + vpmu_send_nmi(sampled); + return; + } + v = sampled; + } + + regs = &v->arch.vpmu.xenpmu_data->pmu.r.regs; + if ( has_hvm_container_domain(sampled->domain) ) + { + struct segment_register cs; + + hvm_get_segment_register(sampled, x86_seg_cs, &cs); + regs->cs = cs.attr.fields.dpl; + } + + send_guest_vcpu_virq(v, VIRQ_XENPMU); +} + +int pmu_nmi_interrupt(struct cpu_user_regs *regs, int cpu) +{ + return vpmu_do_interrupt(regs); +} + +static int pvpmu_init(struct domain *d, xen_pmu_params_t *params) +{ + struct vcpu *v; + struct page_info *page; + uint64_t gfn = params->d.val; + static bool_t __read_mostly pvpmu_initted = 0; + + if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) + return -EINVAL; + + page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); + if ( !page ) + return -EINVAL; + + if ( !get_page_type(page, PGT_writable_page) ) + { + put_page(page); + return -EINVAL; + } + + v = d->vcpu[params->vcpu]; + v->arch.vpmu.xenpmu_data = __map_domain_page_global(page); + if ( !v->arch.vpmu.xenpmu_data ) + { + put_page_and_type(page); + return -EINVAL; + } + + if ( !pvpmu_initted ) + { + if (reserve_lapic_nmi() == 0) + set_nmi_callback(pmu_nmi_interrupt); + else + { + printk("Failed to reserve PMU NMI\n"); + put_page(page); + return -EBUSY; + } + open_softirq(PMU_SOFTIRQ, pmu_softnmi); + + pvpmu_initted = 1; + } + + vpmu_initialise(v); + + return 0; +} + +static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params) +{ + struct vcpu *v; + uint64_t mfn; + + if ( params->vcpu < 0 || params->vcpu >= d->max_vcpus ) + return; + + v = d->vcpu[params->vcpu]; + if (v != current) + vcpu_pause(v); + + if ( v->arch.vpmu.xenpmu_data ) + { + mfn = domain_page_map_to_mfn(v->arch.vpmu.xenpmu_data); + if ( mfn_valid(mfn) ) + { + unmap_domain_page_global(v->arch.vpmu.xenpmu_data); + put_page_and_type(mfn_to_page(mfn)); + } + } + vpmu_destroy(v); + + if (v != current) + vcpu_unpause(v); +} + +long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg) +{ + int ret = -EINVAL; + xen_pmu_params_t pmu_params; + + switch ( op ) + { + case XENPMU_mode_set: + if ( !is_control_domain(current->domain) ) + return -EPERM; + + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + if ( (pmu_params.d.val & ~(XENPMU_MODE_ON | XENPMU_MODE_PRIV)) || + ((pmu_params.d.val & XENPMU_MODE_ON) && + (pmu_params.d.val & XENPMU_MODE_PRIV)) ) + return -EINVAL; + + vpmu_mode = pmu_params.d.val; + + if ( (vpmu_mode == XENPMU_MODE_OFF) || (vpmu_mode & XENPMU_MODE_PRIV) ) + /* + * After this VPMU context will never be loaded during context + * switch. Because PMU MSR accesses load VPMU context we don't + * allow them when VPMU is off and, for non-provileged domains, + * when we are in privileged mode. (We do want these accesses to + * load VPMU context for control domain in this mode) + */ + vpmu_unload_all(); + + ret = 0; + break; + + case XENPMU_mode_get: + pmu_params.d.val = vpmu_mode; + pmu_params.v.version.maj = XENPMU_VER_MAJ; + pmu_params.v.version.min = XENPMU_VER_MIN; + if ( copy_to_guest(arg, &pmu_params, 1) ) + return -EFAULT; + ret = 0; + break; + + case XENPMU_feature_set: + if ( !is_control_domain(current->domain) ) + return -EPERM; + + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + + if ( pmu_params.d.val & ~XENPMU_FEATURE_INTEL_BTS ) + return -EINVAL; + + vpmu_features = pmu_params.d.val; + + ret = 0; + break; + + case XENPMU_feature_get: + pmu_params.d.val = vpmu_mode; + if ( copy_to_guest(arg, &pmu_params, 1) ) + return -EFAULT; + ret = 0; + break; + + case XENPMU_init: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + ret = pvpmu_init(current->domain, &pmu_params); + break; + + case XENPMU_finish: + if ( copy_from_guest(&pmu_params, arg, 1) ) + return -EFAULT; + pvpmu_finish(current->domain, &pmu_params); + break; + + case XENPMU_lvtpc_set: + if ( current->arch.vpmu.xenpmu_data == NULL ) + return -EINVAL; + vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); + ret = 0; + break; + case XENPMU_flush: + current->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED; + vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc); + vpmu_load(current); + ret = 0; + break; + } + + return ret; +} diff --git a/xen/arch/x86/vpmu_amd.c b/xen/arch/x86/vpmu_amd.c new file mode 100644 index 0000000..32d2882 --- /dev/null +++ b/xen/arch/x86/vpmu_amd.c @@ -0,0 +1,509 @@ +/* + * vpmu.c: PMU virtualization for HVM domain. + * + * Copyright (c) 2010, Advanced Micro Devices, Inc. + * Parts of this code are Copyright (c) 2007, Intel Corporation + * + * Author: Wei Wang + * Tested by: Suravee Suthikulpanit + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MSR_F10H_EVNTSEL_GO_SHIFT 40 +#define MSR_F10H_EVNTSEL_EN_SHIFT 22 +#define MSR_F10H_COUNTER_LENGTH 48 + +#define is_guest_mode(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_pmu_enabled(msr) ((msr) & (1ULL << MSR_F10H_EVNTSEL_EN_SHIFT)) +#define set_guest_mode(msr) (msr |= (1ULL << MSR_F10H_EVNTSEL_GO_SHIFT)) +#define is_overflowed(msr) (!((msr) & (1ULL << (MSR_F10H_COUNTER_LENGTH-1)))) + +static unsigned int __read_mostly num_counters; +static const u32 __read_mostly *counters; +static const u32 __read_mostly *ctrls; +static bool_t __read_mostly k7_counters_mirrored; + +#define F10H_NUM_COUNTERS 4 +#define F15H_NUM_COUNTERS 6 +#define AMD_MAX_COUNTERS 6 + +/* PMU Counter MSRs. */ +static const u32 AMD_F10H_COUNTERS[] = { + MSR_K7_PERFCTR0, + MSR_K7_PERFCTR1, + MSR_K7_PERFCTR2, + MSR_K7_PERFCTR3 +}; + +/* PMU Control MSRs. */ +static const u32 AMD_F10H_CTRLS[] = { + MSR_K7_EVNTSEL0, + MSR_K7_EVNTSEL1, + MSR_K7_EVNTSEL2, + MSR_K7_EVNTSEL3 +}; + +static const u32 AMD_F15H_COUNTERS[] = { + MSR_AMD_FAM15H_PERFCTR0, + MSR_AMD_FAM15H_PERFCTR1, + MSR_AMD_FAM15H_PERFCTR2, + MSR_AMD_FAM15H_PERFCTR3, + MSR_AMD_FAM15H_PERFCTR4, + MSR_AMD_FAM15H_PERFCTR5 +}; + +static const u32 AMD_F15H_CTRLS[] = { + MSR_AMD_FAM15H_EVNTSEL0, + MSR_AMD_FAM15H_EVNTSEL1, + MSR_AMD_FAM15H_EVNTSEL2, + MSR_AMD_FAM15H_EVNTSEL3, + MSR_AMD_FAM15H_EVNTSEL4, + MSR_AMD_FAM15H_EVNTSEL5 +}; + +static inline int get_pmu_reg_type(u32 addr) +{ + if ( (addr >= MSR_K7_EVNTSEL0) && (addr <= MSR_K7_EVNTSEL3) ) + return MSR_TYPE_CTRL; + + if ( (addr >= MSR_K7_PERFCTR0) && (addr <= MSR_K7_PERFCTR3) ) + return MSR_TYPE_COUNTER; + + if ( (addr >= MSR_AMD_FAM15H_EVNTSEL0) && + (addr <= MSR_AMD_FAM15H_PERFCTR5 ) ) + { + if (addr & 1) + return MSR_TYPE_COUNTER; + else + return MSR_TYPE_CTRL; + } + + /* unsupported registers */ + return -1; +} + +static inline u32 get_fam15h_addr(u32 addr) +{ + switch ( addr ) + { + case MSR_K7_PERFCTR0: + return MSR_AMD_FAM15H_PERFCTR0; + case MSR_K7_PERFCTR1: + return MSR_AMD_FAM15H_PERFCTR1; + case MSR_K7_PERFCTR2: + return MSR_AMD_FAM15H_PERFCTR2; + case MSR_K7_PERFCTR3: + return MSR_AMD_FAM15H_PERFCTR3; + case MSR_K7_EVNTSEL0: + return MSR_AMD_FAM15H_EVNTSEL0; + case MSR_K7_EVNTSEL1: + return MSR_AMD_FAM15H_EVNTSEL1; + case MSR_K7_EVNTSEL2: + return MSR_AMD_FAM15H_EVNTSEL2; + case MSR_K7_EVNTSEL3: + return MSR_AMD_FAM15H_EVNTSEL3; + default: + break; + } + + return addr; +} + +static void amd_vpmu_set_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_NONE); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_WRITE); + } + + ctxt->msr_bitmap_set = 1; +} + +static void amd_vpmu_unset_msr_bitmap(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + + for ( i = 0; i < num_counters; i++ ) + { + svm_intercept_msr(v, counters[i], MSR_INTERCEPT_RW); + svm_intercept_msr(v, ctrls[i], MSR_INTERCEPT_RW); + } + + ctxt->msr_bitmap_set = 0; +} + +/* Must be NMI-safe */ +static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + return 1; +} + +static inline void context_load(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + for ( i = 0; i < num_counters; i++ ) + { + wrmsrl(counters[i], counter_regs[i]); + wrmsrl(ctrls[i], ctrl_regs[i]); + } +} + +/* Must be NMI-safe */ +static void amd_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + vpmu_reset(vpmu, VPMU_FROZEN); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + unsigned int i; + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], ctrl_regs[i]); + + return; + } + + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + + context_load(v); +} + +static inline void context_save(struct vcpu *v) +{ + unsigned int i; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + + /* No need to save controls -- they are saved in amd_vpmu_do_wrmsr */ + for ( i = 0; i < num_counters; i++ ) + rdmsrl(counters[i], counter_regs[i]); +} + +static int amd_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctx = vpmu->context; + unsigned int i; + + /* + * Stop the counters. If we came here via vpmu_save_force (i.e. + * when VPMU_CONTEXT_SAVE is set) counters are already stopped. + */ + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + { + vpmu_set(vpmu, VPMU_FROZEN); + + for ( i = 0; i < num_counters; i++ ) + wrmsrl(ctrls[i], 0); + + return 0; + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return 0; + + context_save(v); + + if ( has_hvm_container_domain(v->domain) && + !vpmu_is_set(vpmu, VPMU_RUNNING) && ctx->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + + return 1; +} + +static void context_update(unsigned int msr, u64 msr_content) +{ + unsigned int i; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + + if ( k7_counters_mirrored && + ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)) ) + { + msr = get_fam15h_addr(msr); + } + + for ( i = 0; i < num_counters; i++ ) + { + if ( msr == ctrls[i] ) + { + ctrl_regs[i] = msr_content; + return; + } + else if (msr == counters[i] ) + { + counter_regs[i] = msr_content; + return; + } + } +} + +static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + /* For all counters, enable guest only mode for HVM guest */ + if ( has_hvm_container_domain(v->domain) && (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + !(is_guest_mode(msr_content)) ) + { + set_guest_mode(msr_content); + } + + /* check if the first counter is enabled */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + is_pmu_enabled(msr_content) && !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) + return 1; + vpmu_set(vpmu, VPMU_RUNNING); + + if ( has_hvm_container_domain(v->domain) && + !((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_set_msr_bitmap(v); + } + + /* stop saving & restore if guest stops first counter */ + if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) && + (is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + vpmu_reset(vpmu, VPMU_RUNNING); + if ( has_hvm_container_domain(v->domain) && + ((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + release_pmu_ownship(PMU_OWNER_HVM); + } + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + /* Update vpmu context immediately */ + context_update(msr, msr_content); + + /* Write to hw counters */ + wrmsrl(msr, msr_content); + return 1; +} + +static int amd_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) + || vpmu_is_set(vpmu, VPMU_FROZEN) ) + { + context_load(v); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + vpmu_reset(vpmu, VPMU_FROZEN); + } + + rdmsrl(msr, *msr_content); + + return 1; +} + +static int amd_vpmu_initialise(struct vcpu *v) +{ + struct xen_pmu_amd_ctxt *ctxt; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return 0; + + if ( counters == NULL ) + { + switch ( family ) + { + case 0x15: + num_counters = F15H_NUM_COUNTERS; + counters = AMD_F15H_COUNTERS; + ctrls = AMD_F15H_CTRLS; + k7_counters_mirrored = 1; + break; + case 0x10: + case 0x12: + case 0x14: + case 0x16: + default: + num_counters = F10H_NUM_COUNTERS; + counters = AMD_F10H_COUNTERS; + ctrls = AMD_F10H_CTRLS; + k7_counters_mirrored = 0; + break; + } + } + + if ( has_hvm_container_domain(v->domain) ) + { + ctxt = xzalloc_bytes(sizeof(struct xen_pmu_amd_ctxt) + + sizeof(uint64_t) * AMD_MAX_COUNTERS + + sizeof(uint64_t) * AMD_MAX_COUNTERS); + if ( !ctxt ) + { + gdprintk(XENLOG_WARNING, "Insufficient memory for PMU, " + " PMU feature is unavailable on domain %d vcpu %d.\n", + v->vcpu_id, v->domain->domain_id); + return -ENOMEM; + } + } + else + ctxt = &v->arch.vpmu.xenpmu_data->pmu.c.amd; + + ctxt->counters = sizeof(struct xen_pmu_amd_ctxt); + ctxt->ctrls = ctxt->counters + sizeof(uint64_t) * AMD_MAX_COUNTERS; + + vpmu->context = ctxt; + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + return 0; +} + +static void amd_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( has_hvm_container_domain(v->domain) ) + { + if ( ((struct xen_pmu_amd_ctxt *)vpmu->context)->msr_bitmap_set ) + amd_vpmu_unset_msr_bitmap(v); + + xfree(vpmu->context); + } + + vpmu->context = NULL; + vpmu_clear(vpmu); + release_pmu_ownship(PMU_OWNER_HVM); +} + +/* VPMU part of the 'q' keyhandler */ +static void amd_vpmu_dump(const struct vcpu *v) +{ + const struct vpmu_struct *vpmu = vcpu_vpmu(v); + const struct xen_pmu_amd_ctxt *ctxt = vpmu->context; + uint64_t *counter_regs = vpmu_reg_pointer(ctxt, counters); + uint64_t *ctrl_regs = vpmu_reg_pointer(ctxt, ctrls); + unsigned int i; + + printk(" VPMU state: 0x%x ", vpmu->flags); + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + { + printk("\n"); + return; + } + + printk("("); + if ( vpmu_is_set(vpmu, VPMU_PASSIVE_DOMAIN_ALLOCATED) ) + printk("PASSIVE_DOMAIN_ALLOCATED, "); + if ( vpmu_is_set(vpmu, VPMU_FROZEN) ) + printk("FROZEN, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) ) + printk("SAVE, "); + if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) + printk("RUNNING, "); + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk("LOADED, "); + printk("ALLOCATED)\n"); + + for ( i = 0; i < num_counters; i++ ) + { + uint64_t ctrl, cntr; + + rdmsrl(ctrls[i], ctrl); + rdmsrl(counters[i], cntr); + printk(" %#x: %#lx (%#lx in HW) %#x: %#lx (%#lx in HW)\n", + ctrls[i], ctrl_regs[i], ctrl, + counters[i], counter_regs[i], cntr); + } +} + +struct arch_vpmu_ops amd_vpmu_ops = { + .do_wrmsr = amd_vpmu_do_wrmsr, + .do_rdmsr = amd_vpmu_do_rdmsr, + .do_interrupt = amd_vpmu_do_interrupt, + .arch_vpmu_destroy = amd_vpmu_destroy, + .arch_vpmu_save = amd_vpmu_save, + .arch_vpmu_load = amd_vpmu_load, + .arch_vpmu_dump = amd_vpmu_dump +}; + +int svm_vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + int ret = 0; + + /* vpmu enabled? */ + if ( vpmu_mode == XENPMU_MODE_OFF ) + return 0; + + switch ( family ) + { + case 0x10: + case 0x12: + case 0x14: + case 0x15: + case 0x16: + ret = amd_vpmu_initialise(v); + if ( !ret ) + vpmu->arch_vpmu_ops = &amd_vpmu_ops; + return ret; + } + + printk("VPMU: Initialization failed. " + "AMD processor family %d has not " + "been supported\n", family); + return -EINVAL; +} + diff --git a/xen/arch/x86/vpmu_intel.c b/xen/arch/x86/vpmu_intel.c new file mode 100644 index 0000000..195511e --- /dev/null +++ b/xen/arch/x86/vpmu_intel.c @@ -0,0 +1,940 @@ +/* + * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID + * instruction. + * cpuid 0xa - Architectural Performance Monitoring Leaf + * Register eax + */ +#define PMU_VERSION_SHIFT 0 /* Version ID */ +#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */ +#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT) + +#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */ +#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */ +#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT) + +#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */ +#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */ +#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT) +/* Register edx */ +#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */ +#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */ +#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT) + +#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */ +#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */ +#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT) + +/* Alias registers (0x4c1) for full-width writes to PMCs */ +#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0)) +static bool_t __read_mostly full_width_write; + +/* Intel-specific VPMU features */ +#define VPMU_CPU_HAS_DS 0x100 /* Has Debug Store */ +#define VPMU_CPU_HAS_BTS 0x200 /* Has Branch Trace Store */ + +/* + * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed + * counters. 4 bits for every counter. + */ +#define FIXED_CTR_CTRL_BITS 4 +#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1) + +/* Number of general-purpose and fixed performance counters */ +static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt; + +/* + * QUIRK to workaround an issue on various family 6 cpus. + * The issue leads to endless PMC interrupt loops on the processor. + * If the interrupt handler is running and a pmc reaches the value 0, this + * value remains forever and it triggers immediately a new interrupt after + * finishing the handler. + * A workaround is to read all flagged counters and if the value is 0 write + * 1 (or another value != 0) into it. + * There exist no errata and the real cause of this behaviour is unknown. + */ +bool_t __read_mostly is_pmc_quirk; + +static void check_pmc_quirk(void) +{ + if ( current_cpu_data.x86 == 6 ) + is_pmc_quirk = 1; + else + is_pmc_quirk = 0; +} + +static void handle_pmc_quirk(u64 msr_content) +{ + int i; + u64 val; + + if ( !is_pmc_quirk ) + return; + + val = msr_content; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_P6_PERFCTR0 + i, cnt); + if ( cnt == 0 ) + wrmsrl(MSR_P6_PERFCTR0 + i, 1); + } + val >>= 1; + } + val = msr_content >> 32; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + if ( val & 0x1 ) + { + u64 cnt; + rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt); + if ( cnt == 0 ) + wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1); + } + val >>= 1; + } +} + +/* + * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15] + */ +static int core2_get_arch_pmc_count(void) +{ + u32 eax; + + eax = cpuid_eax(0xa); + return ( (eax & PMU_GENERAL_NR_MASK) >> PMU_GENERAL_NR_SHIFT ); +} + +/* + * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4] + */ +static int core2_get_fixed_pmc_count(void) +{ + u32 eax; + + eax = cpuid_eax(0xa); + return ( (eax & PMU_FIXED_NR_MASK) >> PMU_FIXED_NR_SHIFT ); +} + +/* edx bits 5-12: Bit width of fixed-function performance counters */ +static int core2_get_bitwidth_fix_count(void) +{ + u32 edx; + + edx = cpuid_edx(0xa); + return ( (edx & PMU_FIXED_WIDTH_MASK) >> PMU_FIXED_WIDTH_SHIFT ); +} + +static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index) +{ + int i; + u32 msr_index_pmc; + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + if ( msr_index == MSR_CORE_PERF_FIXED_CTR0 + i ) + { + *type = MSR_TYPE_COUNTER; + *index = i; + return 1; + } + } + + if ( (msr_index == MSR_CORE_PERF_FIXED_CTR_CTRL ) || + (msr_index == MSR_IA32_DS_AREA) || + (msr_index == MSR_IA32_PEBS_ENABLE) ) + { + *type = MSR_TYPE_CTRL; + return 1; + } + + if ( (msr_index == MSR_CORE_PERF_GLOBAL_CTRL) || + (msr_index == MSR_CORE_PERF_GLOBAL_STATUS) || + (msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL) ) + { + *type = MSR_TYPE_GLOBAL; + return 1; + } + + msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; + if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) && + (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) ) + { + *type = MSR_TYPE_ARCH_COUNTER; + *index = msr_index_pmc - MSR_IA32_PERFCTR0; + return 1; + } + + if ( (msr_index >= MSR_P6_EVNTSEL0) && + (msr_index < (MSR_P6_EVNTSEL0 + arch_pmc_cnt)) ) + { + *type = MSR_TYPE_ARCH_CTRL; + *index = msr_index - MSR_P6_EVNTSEL0; + return 1; + } + + return 0; +} + +#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000) +static void core2_vpmu_set_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + /* Allow Read/Write PMU Counters MSR Directly. */ + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), + msr_bitmap + 0x800/BYTES_PER_LONG); + + if ( full_width_write ) + { + clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + } + + /* Allow Read PMU Non-global Controls Directly. */ + for ( i = 0; i < arch_pmc_cnt; i++ ) + clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0 + i), msr_bitmap); + + clear_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); + clear_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); +} + +static void core2_vpmu_unset_msr_bitmap(unsigned long *msr_bitmap) +{ + int i; + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + + if ( full_width_write ) + { + set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_A_PERFCTR0 + i), + msr_bitmap + 0x800/BYTES_PER_LONG); + } + } + + for ( i = 0; i < arch_pmc_cnt; i++ ) + set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0 + i), msr_bitmap); + + set_bit(msraddr_to_bitpos(MSR_CORE_PERF_FIXED_CTR_CTRL), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_PEBS_ENABLE), msr_bitmap); + set_bit(msraddr_to_bitpos(MSR_IA32_DS_AREA), msr_bitmap); +} + +static inline void __core2_vpmu_save(struct vcpu *v) +{ + int i; + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); + for ( i = 0; i < arch_pmc_cnt; i++ ) + rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter); + + if ( !has_hvm_container_domain(v->domain) ) + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status); +} + +/* Must be NMI-safe */ +static int core2_vpmu_save(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !has_hvm_container_domain(v->domain) ) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if ( !vpmu_is_set_all(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) ) + return 0; + + __core2_vpmu_save(v); + + /* Unset PMU MSR bitmap to trap lazy load. */ + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && cpu_has_vmx_msr_bitmap + && has_hvm_container_domain(v->domain) ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + + return 1; +} + +static inline void __core2_vpmu_load(struct vcpu *v) +{ + unsigned int i, pmc_start; + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context; + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + for ( i = 0; i < fixed_pmc_cnt; i++ ) + wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]); + + if ( full_width_write ) + pmc_start = MSR_IA32_A_PERFCTR0; + else + pmc_start = MSR_IA32_PERFCTR0; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter); + wrmsrl(MSR_P6_EVNTSEL0 + i, xen_pmu_cntr_pair[i].control); + } + + wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl); + wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area); + wrmsrl(MSR_IA32_PEBS_ENABLE, core2_vpmu_cxt->pebs_enable); + + if ( !has_hvm_container_domain(v->domain) ) + { + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl); + core2_vpmu_cxt->global_ovf_ctrl = 0; + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl); + } +} + +static void core2_vpmu_load(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + return; + + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + + __core2_vpmu_load(v); +} + +static int core2_vpmu_alloc_resource(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt; + + if ( !is_pv_domain(v->domain) ) + { + if ( !acquire_pmu_ownership(PMU_OWNER_HVM) ) + return 0; + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err; + + if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) ) + goto out_err; + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + core2_vpmu_cxt = xzalloc_bytes(sizeof(struct xen_pmu_intel_ctxt) + + sizeof(uint64_t) * fixed_pmc_cnt + + sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt); + if ( !core2_vpmu_cxt ) + goto out_err; + } + else + { + core2_vpmu_cxt = &v->arch.vpmu.xenpmu_data->pmu.c.intel; + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + } + + core2_vpmu_cxt->fixed_counters = sizeof(struct xen_pmu_intel_ctxt); + core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters + + sizeof(uint64_t) * fixed_pmc_cnt; + + vpmu->context = (void *)core2_vpmu_cxt; + + vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED); + + return 1; + +out_err: + vmx_rm_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL); + vmx_rm_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL); + release_pmu_ownship(PMU_OWNER_HVM); + + printk("Failed to allocate VPMU resources for domain %u vcpu %u\n", + v->vcpu_id, v->domain->domain_id); + + return 0; +} + +static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( !is_core2_vpmu_msr(msr_index, type, index) ) + return 0; + + if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) && + !core2_vpmu_alloc_resource(current) ) + return 0; + + /* Do the lazy load staff. */ + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + { + __core2_vpmu_load(current); + vpmu_set(vpmu, VPMU_CONTEXT_LOADED); + if ( cpu_has_vmx_msr_bitmap && has_hvm_container_domain(current->domain) ) + core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap); + } + return 1; +} + +static void inject_trap(struct vcpu *v, unsigned int trapno) +{ + if ( has_hvm_container_domain(v->domain) ) + hvm_inject_hw_exception(trapno, 0); + else + send_guest_trap(v->domain, v->vcpu_id, trapno); +} + +static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content) +{ + u64 global_ctrl, non_global_ctrl; + unsigned pmu_enable = 0; + int i, tmp; + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; + + if ( !core2_vpmu_msr_common_check(msr, &type, &index) ) + { + /* Special handling for BTS */ + if ( msr == MSR_IA32_DEBUGCTLMSR ) + { + uint64_t supported = IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS | + IA32_DEBUGCTLMSR_BTINT; + + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS | + IA32_DEBUGCTLMSR_BTS_OFF_USR; + if ( msr_content & supported ) + { + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + return 1; + gdprintk(XENLOG_WARNING, "Debug Store is not supported on this cpu\n"); + inject_trap(v, TRAP_gp_fault); + return 0; + } + } + return 0; + } + + core2_vpmu_cxt = vpmu->context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + core2_vpmu_cxt->global_status &= ~msr_content; + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); + return 1; + case MSR_CORE_PERF_GLOBAL_STATUS: + gdprintk(XENLOG_INFO, "Can not write readonly MSR: " + "MSR_PERF_GLOBAL_STATUS(0x38E)!\n"); + inject_trap(v, TRAP_gp_fault); + return 1; + case MSR_IA32_PEBS_ENABLE: + if ( msr_content & 1 ) + gdprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, " + "which is not supported.\n"); + core2_vpmu_cxt->pebs_enable = msr_content; + return 1; + case MSR_IA32_DS_AREA: + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + if ( !is_canonical_address(msr_content) ) + { + gdprintk(XENLOG_WARNING, + "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n", + msr_content); + inject_trap(v, TRAP_gp_fault); + return 1; + } + core2_vpmu_cxt->ds_area = msr_content; + break; + } + gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n"); + return 1; + case MSR_CORE_PERF_GLOBAL_CTRL: + global_ctrl = msr_content; + for ( i = 0; i < arch_pmc_cnt; i++ ) + { + rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl); + pmu_enable += global_ctrl & (non_global_ctrl >> 22) & 1; + global_ctrl >>= 1; + } + + rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl); + global_ctrl = msr_content >> 32; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + pmu_enable += (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1 : 0); + non_global_ctrl >>= FIXED_CTR_CTRL_BITS; + global_ctrl >>= 1; + } + core2_vpmu_cxt->global_ctrl = msr_content; + break; + case MSR_CORE_PERF_FIXED_CTR_CTRL: + non_global_ctrl = msr_content; + if ( has_hvm_container_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); + global_ctrl >>= 32; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + pmu_enable += (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1 : 0); + non_global_ctrl >>= 4; + global_ctrl >>= 1; + } + core2_vpmu_cxt->fixed_ctrl = msr_content; + break; + default: + tmp = msr - MSR_P6_EVNTSEL0; + if ( tmp >= 0 && tmp < arch_pmc_cnt ) + { + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + if ( has_hvm_container_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); + xen_pmu_cntr_pair[tmp].control = msr_content; + for ( i = 0; i < arch_pmc_cnt && !pmu_enable; i++ ) + pmu_enable += (global_ctrl >> i) & + (xen_pmu_cntr_pair[i].control >> 22) & 1; + } + } + + pmu_enable += (core2_vpmu_cxt->ds_area != 0); + if ( pmu_enable ) + vpmu_set(vpmu, VPMU_RUNNING); + else + vpmu_reset(vpmu, VPMU_RUNNING); + + if ( type != MSR_TYPE_GLOBAL ) + { + u64 mask; + int inject_gp = 0; + switch ( type ) + { + case MSR_TYPE_ARCH_CTRL: /* MSR_P6_EVNTSEL[0,...] */ + mask = ~((1ull << 32) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + case MSR_TYPE_CTRL: /* IA32_FIXED_CTR_CTRL */ + if ( msr == MSR_IA32_DS_AREA ) + break; + /* 4 bits per counter, currently 3 fixed counters implemented. */ + mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + case MSR_TYPE_COUNTER: /* IA32_FIXED_CTR[0-2] */ + mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1); + if (msr_content & mask) + inject_gp = 1; + break; + } + + if (inject_gp) + inject_trap(v, TRAP_gp_fault); + else + wrmsrl(msr, msr_content); + } + else + { + if ( has_hvm_container_domain(v->domain) ) + vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + } + + return 1; +} + +static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + struct vcpu *v = current; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; + + if ( core2_vpmu_msr_common_check(msr, &type, &index) ) + { + core2_vpmu_cxt = vpmu->context; + switch ( msr ) + { + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + *msr_content = 0; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + *msr_content = core2_vpmu_cxt->global_status; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + if ( has_hvm_container_domain(v->domain) ) + vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content); + else + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content); + break; + default: + rdmsrl(msr, *msr_content); + } + } + else + { + /* Extension for BTS */ + if ( msr == MSR_IA32_MISC_ENABLE ) + { + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) ) + *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL; + } + else + return 0; + } + + return 1; +} + +static void core2_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if (input == 0x1) + { + struct vpmu_struct *vpmu = vcpu_vpmu(current); + + if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) ) + { + /* Switch on the 'Debug Store' feature in CPUID.EAX[1]:EDX[21] */ + *edx |= cpufeat_mask(X86_FEATURE_DS); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DTES64) ) + *ecx |= cpufeat_mask(X86_FEATURE_DTES64); + if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) ) + *ecx |= cpufeat_mask(X86_FEATURE_DSCPL); + } + } +} + +/* Dump vpmu info on console, called in the context of keyhandler 'q'. */ +static void core2_vpmu_dump(const struct vcpu *v) +{ + const struct vpmu_struct *vpmu = vcpu_vpmu(v); + int i; + const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL; + u64 val; + uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters); + struct xen_pmu_cntr_pair *xen_pmu_cntr_pair = + vpmu_reg_pointer(core2_vpmu_cxt, arch_counters); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ) + { + if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) ) + printk(" vPMU loaded\n"); + else + printk(" vPMU allocated\n"); + return; + } + + printk(" vPMU running\n"); + core2_vpmu_cxt = vpmu->context; + + /* Print the contents of the counter and its configuration msr. */ + for ( i = 0; i < arch_pmc_cnt; i++ ) + printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n", + i, xen_pmu_cntr_pair[i].counter, xen_pmu_cntr_pair[i].control); + + /* + * The configuration of the fixed counter is 4 bits each in the + * MSR_CORE_PERF_FIXED_CTR_CTRL. + */ + val = core2_vpmu_cxt->fixed_ctrl; + for ( i = 0; i < fixed_pmc_cnt; i++ ) + { + printk(" fixed_%d: 0x%016lx ctrl: %#lx\n", + i, fixed_counters[i], + val & FIXED_CTR_CTRL_MASK); + val >>= FIXED_CTR_CTRL_BITS; + } +} + +static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs) +{ + struct vcpu *v = current; + u64 msr_content; + struct vpmu_struct *vpmu = vcpu_vpmu(v); + struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content); + if ( msr_content ) + { + if ( is_pmc_quirk ) + handle_pmc_quirk(msr_content); + core2_vpmu_cxt->global_status |= msr_content; + msr_content = 0xC000000700000000 | ((1 << arch_pmc_cnt) - 1); + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content); + } + else + { + /* No PMC overflow but perhaps a Trace Message interrupt. */ + __vmread(GUEST_IA32_DEBUGCTL, &msr_content); + if ( !(msr_content & IA32_DEBUGCTLMSR_TR) ) + return 0; + } + + return 1; +} + +static int core2_vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + u64 msr_content; + struct cpuinfo_x86 *c = ¤t_cpu_data; + + if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) ) + goto func_out; + /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */ + if ( cpu_has(c, X86_FEATURE_DS) ) + { + if ( !cpu_has(c, X86_FEATURE_DTES64) ) + { + printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area" + " - Debug Store disabled for d%d:v%d\n", + v->domain->domain_id, v->vcpu_id); + goto func_out; + } + vpmu_set(vpmu, VPMU_CPU_HAS_DS); + rdmsrl(MSR_IA32_MISC_ENABLE, msr_content); + if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL ) + { + /* If BTS_UNAVAIL is set reset the DS feature. */ + vpmu_reset(vpmu, VPMU_CPU_HAS_DS); + printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL" + " - Debug Store disabled for d%d:v%d\n", + v->domain->domain_id, v->vcpu_id); + } + else + { + vpmu_set(vpmu, VPMU_CPU_HAS_BTS); + if ( !cpu_has(c, X86_FEATURE_DSCPL) ) + printk(XENLOG_G_INFO + "vpmu: CPU doesn't support CPL-Qualified BTS\n"); + printk("******************************************************\n"); + printk("** WARNING: Emulation of BTS Feature is switched on **\n"); + printk("** Using this processor feature in a virtualized **\n"); + printk("** environment is not 100%% safe. **\n"); + printk("** Setting the DS buffer address with wrong values **\n"); + printk("** may lead to hypervisor hangs or crashes. **\n"); + printk("** It is NOT recommended for production use! **\n"); + printk("******************************************************\n"); + } + } +func_out: + + arch_pmc_cnt = core2_get_arch_pmc_count(); + fixed_pmc_cnt = core2_get_fixed_pmc_count(); + check_pmc_quirk(); + + /* PV domains can allocate resources immediately */ + if ( !has_hvm_container_domain(v->domain) && !core2_vpmu_alloc_resource(v) ) + return 1; + + return 0; +} + +static void core2_vpmu_destroy(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + + if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) ) + return; + + if ( has_hvm_container_domain(v->domain) ) + { + xfree(vpmu->context); + if ( cpu_has_vmx_msr_bitmap ) + core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap); + } + + release_pmu_ownship(PMU_OWNER_HVM); + vpmu->context = NULL; + vpmu_clear(vpmu); +} + +struct arch_vpmu_ops core2_vpmu_ops = { + .do_wrmsr = core2_vpmu_do_wrmsr, + .do_rdmsr = core2_vpmu_do_rdmsr, + .do_interrupt = core2_vpmu_do_interrupt, + .do_cpuid = core2_vpmu_do_cpuid, + .arch_vpmu_destroy = core2_vpmu_destroy, + .arch_vpmu_save = core2_vpmu_save, + .arch_vpmu_load = core2_vpmu_load, + .arch_vpmu_dump = core2_vpmu_dump +}; + +static void core2_no_vpmu_do_cpuid(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* + * As in this case the vpmu is not enabled reset some bits in the + * architectural performance monitoring related part. + */ + if ( input == 0xa ) + { + *eax &= ~PMU_VERSION_MASK; + *eax &= ~PMU_GENERAL_NR_MASK; + *eax &= ~PMU_GENERAL_WIDTH_MASK; + + *edx &= ~PMU_FIXED_NR_MASK; + *edx &= ~PMU_FIXED_WIDTH_MASK; + } +} + +/* + * If its a vpmu msr set it to 0. + */ +static int core2_no_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) +{ + int type = -1, index = -1; + if ( !is_core2_vpmu_msr(msr, &type, &index) ) + return 0; + *msr_content = 0; + return 1; +} + +/* + * These functions are used in case vpmu is not enabled. + */ +struct arch_vpmu_ops core2_no_vpmu_ops = { + .do_rdmsr = core2_no_vpmu_do_rdmsr, + .do_cpuid = core2_no_vpmu_do_cpuid, +}; + +int vmx_vpmu_initialise(struct vcpu *v) +{ + struct vpmu_struct *vpmu = vcpu_vpmu(v); + uint8_t family = current_cpu_data.x86; + uint8_t cpu_model = current_cpu_data.x86_model; + int ret = 0; + + vpmu->arch_vpmu_ops = &core2_no_vpmu_ops; + if ( vpmu_mode == XENPMU_MODE_OFF ) + return 0; + + if ( family == 6 ) + { + u64 caps; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps); + full_width_write = (caps >> 13) & 1; + + switch ( cpu_model ) + { + /* Core2: */ + case 0x0f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 0x16: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 0x17: /* 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 0x1d: /* six-core 45 nm xeon "Dunnington" */ + + case 0x2a: /* SandyBridge */ + case 0x2d: /* SandyBridge, "Romley-EP" */ + + /* Nehalem: */ + case 0x1a: /* 45 nm nehalem, "Bloomfield" */ + case 0x1e: /* 45 nm nehalem, "Lynnfield", "Clarksfield", "Jasper Forest" */ + case 0x2e: /* 45 nm nehalem-ex, "Beckton" */ + + /* Westmere: */ + case 0x25: /* 32 nm nehalem, "Clarkdale", "Arrandale" */ + case 0x2c: /* 32 nm nehalem, "Gulftown", "Westmere-EP" */ + case 0x27: /* 32 nm Westmere-EX */ + + case 0x3a: /* IvyBridge */ + case 0x3e: /* IvyBridge EP */ + + /* Haswell: */ + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + ret = core2_vpmu_initialise(v); + if ( !ret ) + vpmu->arch_vpmu_ops = &core2_vpmu_ops; + return ret; + } + } + + printk("VPMU: Initialization failed. " + "Intel processor family %d model %d has not " + "been supported\n", family, cpu_model); + return -EINVAL; +} + diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h index ed81cfb..d27df39 100644 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h @@ -20,7 +20,7 @@ #define __ASM_X86_HVM_VMX_VMCS_H__ #include -#include +#include #include extern void vmcs_dump_vcpu(struct vcpu *v); diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h deleted file mode 100644 index 0f3de14..0000000 --- a/xen/include/asm-x86/hvm/vpmu.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * vpmu.h: PMU virtualization for HVM domain. - * - * Copyright (c) 2007, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Author: Haitao Shan - */ - -#ifndef __ASM_X86_HVM_VPMU_H_ -#define __ASM_X86_HVM_VPMU_H_ - -#include - -#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu) -#define vpmu_vcpu(vpmu) container_of(vpmu, struct vcpu, arch.vpmu) - -#define MSR_TYPE_COUNTER 0 -#define MSR_TYPE_CTRL 1 -#define MSR_TYPE_GLOBAL 2 -#define MSR_TYPE_ARCH_COUNTER 3 -#define MSR_TYPE_ARCH_CTRL 4 - -/* Start of PMU register bank */ -#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \ - (uintptr_t)ctxt->offset)) - -/* Arch specific operations shared by all vpmus */ -struct arch_vpmu_ops { - int (*do_wrmsr)(unsigned int msr, uint64_t msr_content); - int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); - int (*do_interrupt)(struct cpu_user_regs *regs); - void (*do_cpuid)(unsigned int input, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); - void (*arch_vpmu_destroy)(struct vcpu *v); - int (*arch_vpmu_save)(struct vcpu *v); - void (*arch_vpmu_load)(struct vcpu *v); - void (*arch_vpmu_dump)(const struct vcpu *); -}; - -int vmx_vpmu_initialise(struct vcpu *); -int svm_vpmu_initialise(struct vcpu *); - -struct vpmu_struct { - u32 flags; - u32 last_pcpu; - u32 hw_lapic_lvtpc; - void *context; - struct arch_vpmu_ops *arch_vpmu_ops; - xen_pmu_data_t *xenpmu_data; -}; - -/* VPMU states */ -#define VPMU_CONTEXT_ALLOCATED 0x1 -#define VPMU_CONTEXT_LOADED 0x2 -#define VPMU_RUNNING 0x4 -#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ -#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ -#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 - -#define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x)) -#define vpmu_reset(_vpmu, _x) ((_vpmu)->flags &= ~(_x)) -#define vpmu_is_set(_vpmu, _x) ((_vpmu)->flags & (_x)) -#define vpmu_is_set_all(_vpmu, _x) (((_vpmu)->flags & (_x)) == (_x)) -#define vpmu_clear(_vpmu) ((_vpmu)->flags = 0) - -void vpmu_lvtpc_update(uint32_t val); -int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content); -int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content); -int vpmu_do_interrupt(struct cpu_user_regs *regs); -void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx); -void vpmu_initialise(struct vcpu *v); -void vpmu_destroy(struct vcpu *v); -void vpmu_save(struct vcpu *v); -void vpmu_load(struct vcpu *v); -void vpmu_dump(struct vcpu *v); - -extern int acquire_pmu_ownership(int pmu_ownership); -extern void release_pmu_ownership(int pmu_ownership); - -extern uint64_t vpmu_mode; -extern uint64_t vpmu_features; - -#endif /* __ASM_X86_HVM_VPMU_H_*/ - diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h new file mode 100644 index 0000000..0f3de14 --- /dev/null +++ b/xen/include/asm-x86/vpmu.h @@ -0,0 +1,99 @@ +/* + * vpmu.h: PMU virtualization for HVM domain. + * + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Haitao Shan + */ + +#ifndef __ASM_X86_HVM_VPMU_H_ +#define __ASM_X86_HVM_VPMU_H_ + +#include + +#define vcpu_vpmu(vcpu) (&(vcpu)->arch.vpmu) +#define vpmu_vcpu(vpmu) container_of(vpmu, struct vcpu, arch.vpmu) + +#define MSR_TYPE_COUNTER 0 +#define MSR_TYPE_CTRL 1 +#define MSR_TYPE_GLOBAL 2 +#define MSR_TYPE_ARCH_COUNTER 3 +#define MSR_TYPE_ARCH_CTRL 4 + +/* Start of PMU register bank */ +#define vpmu_reg_pointer(ctxt, offset) ((void *)((uintptr_t)ctxt + \ + (uintptr_t)ctxt->offset)) + +/* Arch specific operations shared by all vpmus */ +struct arch_vpmu_ops { + int (*do_wrmsr)(unsigned int msr, uint64_t msr_content); + int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content); + int (*do_interrupt)(struct cpu_user_regs *regs); + void (*do_cpuid)(unsigned int input, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); + void (*arch_vpmu_destroy)(struct vcpu *v); + int (*arch_vpmu_save)(struct vcpu *v); + void (*arch_vpmu_load)(struct vcpu *v); + void (*arch_vpmu_dump)(const struct vcpu *); +}; + +int vmx_vpmu_initialise(struct vcpu *); +int svm_vpmu_initialise(struct vcpu *); + +struct vpmu_struct { + u32 flags; + u32 last_pcpu; + u32 hw_lapic_lvtpc; + void *context; + struct arch_vpmu_ops *arch_vpmu_ops; + xen_pmu_data_t *xenpmu_data; +}; + +/* VPMU states */ +#define VPMU_CONTEXT_ALLOCATED 0x1 +#define VPMU_CONTEXT_LOADED 0x2 +#define VPMU_RUNNING 0x4 +#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */ +#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */ +#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20 + +#define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x)) +#define vpmu_reset(_vpmu, _x) ((_vpmu)->flags &= ~(_x)) +#define vpmu_is_set(_vpmu, _x) ((_vpmu)->flags & (_x)) +#define vpmu_is_set_all(_vpmu, _x) (((_vpmu)->flags & (_x)) == (_x)) +#define vpmu_clear(_vpmu) ((_vpmu)->flags = 0) + +void vpmu_lvtpc_update(uint32_t val); +int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content); +int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content); +int vpmu_do_interrupt(struct cpu_user_regs *regs); +void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx); +void vpmu_initialise(struct vcpu *v); +void vpmu_destroy(struct vcpu *v); +void vpmu_save(struct vcpu *v); +void vpmu_load(struct vcpu *v); +void vpmu_dump(struct vcpu *v); + +extern int acquire_pmu_ownership(int pmu_ownership); +extern void release_pmu_ownership(int pmu_ownership); + +extern uint64_t vpmu_mode; +extern uint64_t vpmu_features; + +#endif /* __ASM_X86_HVM_VPMU_H_*/ + -- 1.8.3.1