[PATCH] Enable Core 2 Duo Performance Counters in HVM guest

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] Enable Core 2 Duo Performance Counters in HVM guest
@ 2007-12-11 10:06 Shan, Haitao
  2007-12-11 10:37 ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-11 10:06 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 709 bytes --]

Hi, Keir,

Currently, HVM guests do not have access to performance counters. So it
is not possible to use performance analyzer software such as vtune in
HVM guest to analyze programme performance. Other usage of performance
counters , for example, the NMI watchdog, won't function either.
This patch will enable performance counters in HVM guest. Currently,
only Core 2 Duo is implemented. Tests are carried out using Intel Vtune
Performance Analyzer in Windows XP Professional with 2 vcpus.
There are some notes at the beginning of the patch, describing
implementation choices in this patch.

Signed-off-by:    Haitao Shan <haitao.shan@intel.com>
 <<pmu.patch>> 

Best Regards
Haitao Shan


[-- Attachment #1.2: Type: text/html, Size: 1531 bytes --]

[-- Attachment #2: pmu.patch --]
[-- Type: application/octet-stream, Size: 37048 bytes --]

Description of the implementation of the patch:
1. No matter what type of interrupt guest configed to use, the underlyzing
   hardware will always use normal interrupt.
2. We don't use VMX's HW msr switching capability to switch all the performance
   MSRs. Instead, only MSR_PERF_GLOBAL_CTRL is used. All the other MSRs are
   using software approach to save and restore.
3. When PMC is running, we use "eager save, eager load"; Otherwise, we use
   "eager save, lazy load".
4. No DTS(Debug trace store) is supported.
diff -r 4054cd60895b xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/apic.c	Tue Dec 11 23:47:06 2007 +0800
@@ -93,6 +93,9 @@ void __init apic_intr_init(void)
     /* IPI vectors for APIC spurious and error interrupts */
     set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
     set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+    /* Performance Counters Interrupt */
+    set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
     /* thermal monitor LVT interrupt */
 #ifdef CONFIG_X86_MCE_P4THERMAL
@@ -1227,6 +1230,16 @@ fastcall void smp_error_interrupt(struct
 }
 
 /*
+ * This interrupt handles performance counters interrupt
+  */
+
+fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs)
+{
+    ack_APIC_irq();
+    hvm_do_pmu_interrupt(regs);
+}
+
+/*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
diff -r 4054cd60895b xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Tue Dec 11 23:47:06 2007 +0800
@@ -867,6 +867,11 @@ static int svm_event_pending(struct vcpu
     return vmcb->eventinj.fields.v;
 }
 
+static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+
 static struct hvm_function_table svm_function_table = {
     .name                 = "SVM",
     .cpu_down             = svm_cpu_down,
@@ -890,7 +895,8 @@ static struct hvm_function_table svm_fun
     .set_tsc_offset       = svm_set_tsc_offset,
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
-    .event_pending        = svm_event_pending
+    .event_pending        = svm_event_pending,
+    .do_pmu_interrupt     = svm_do_pmu_interrupt
 };
 
 int start_svm(struct cpuinfo_x86 *c)
diff -r 4054cd60895b xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/hvm/vlapic.c	Tue Dec 11 23:47:06 2007 +0800
@@ -35,6 +35,8 @@
 #include <asm/hvm/vmx/vmx.h>
 #include <public/hvm/ioreq.h>
 #include <public/hvm/params.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vpmu.h>
 
 #define VLAPIC_VERSION                  0x00050014
 #define VLAPIC_LVT_NUM                  6
@@ -540,6 +542,7 @@ static void vlapic_write(struct vcpu *v,
                          unsigned long len, unsigned long val)
 {
     struct vlapic *vlapic = vcpu_vlapic(v);
+    struct vpmu_struct *vpmu = NULL;
     unsigned int offset = address - vlapic_base_address(vlapic);
 
     if ( offset != 0xb0 )
@@ -649,7 +652,6 @@ static void vlapic_write(struct vcpu *v,
     case APIC_LVTT:         /* LVT Timer Reg */
         vlapic->pt.irq = val & APIC_VECTOR_MASK;
     case APIC_LVTTHMR:      /* LVT Thermal Monitor */
-    case APIC_LVTPC:        /* LVT Performance Counter */
     case APIC_LVT0:         /* LVT LINT0 Reg */
     case APIC_LVT1:         /* LVT Lint1 Reg */
     case APIC_LVTERR:       /* LVT Error Reg */
@@ -659,6 +661,29 @@ static void vlapic_write(struct vcpu *v,
         vlapic_set_reg(vlapic, offset, val);
         break;
 
+	case APIC_LVTPC:		/* LVT Performance Counter */
+    {
+        unsigned int vector = 0;
+        vpmu = vcpu_vpmu(current);
+
+        if ( vlapic_sw_disabled(vlapic) )
+            val |= APIC_LVT_MASKED;
+        val &= vlapic_lvt_mask[(offset - APIC_LVTT) >> 4];
+        vlapic_set_reg(vlapic, offset, val);
+		apic_write_around(APIC_LVTPC,
+                ((val & ~APIC_VECTOR_MASK)|(PMU_APIC_VECTOR))& ~0x700);
+
+		switch (GET_APIC_DELIVERY_MODE(val)) {
+		case APIC_MODE_FIXED:
+			vector = val & APIC_VECTOR_MASK;
+            vpmu->int_vec = vector;
+            break;
+        case APIC_MODE_NMI:
+            vpmu->int_vec = 0;
+            break;
+        }
+        break;
+	}
     case APIC_TMICT:
     {
         uint64_t period = (uint64_t)APIC_BUS_CYCLE_NS *
@@ -989,3 +1014,12 @@ int is_lvtt_enabled(struct vcpu *v)
 
     return 1;
 }
+
+int is_lvtpc_enabled(struct vcpu *v)
+{
+    if ( unlikely(!vlapic_enabled(vcpu_vlapic(v))) ||
+            !vlapic_lvt_enabled(vcpu_vlapic(v), APIC_LVTPC))
+        return 0;
+
+    return 1;
+}
diff -r 4054cd60895b xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/Makefile	Tue Dec 11 23:47:06 2007 +0800
@@ -9,3 +9,5 @@ endif
 endif
 obj-y += vmcs.o
 obj-y += vmx.o
+obj-y += vpmu.o
+obj-y += vpmu_core.o
diff -r 4054cd60895b xen/arch/x86/hvm/vmx/vmcs.c
--- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmcs.c	Tue Dec 11 23:47:06 2007 +0800
@@ -31,6 +31,7 @@
 #include <asm/hvm/support.h>
 #include <asm/hvm/vmx/vmx.h>
 #include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/vmx/vpmu.h>
 #include <asm/flushtlb.h>
 #include <xen/event.h>
 #include <xen/kernel.h>
diff -r 4054cd60895b xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Dec 11 23:47:06 2007 +0800
@@ -90,6 +90,8 @@ static int vmx_vcpu_initialise(struct vc
         return rc;
     }
 
+	vpmu_initialise(v);
+
     vmx_install_vlapic_mapping(v);
 
 #ifndef VMXASSIST
@@ -104,6 +106,7 @@ static void vmx_vcpu_destroy(struct vcpu
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
     vmx_destroy_vmcs(v);
+    vpmu_destroy(v);
 }
 
 #ifdef __x86_64__
@@ -742,6 +745,7 @@ static void vmx_ctxt_switch_from(struct 
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
+    vpmu_save(v);
 }
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -752,6 +756,7 @@ static void vmx_ctxt_switch_to(struct vc
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vpmu_load(v);
 }
 
 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
@@ -1129,6 +1134,11 @@ static int vmx_event_pending(struct vcpu
 {
     ASSERT(v == current);
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
+}
+
+static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return vpmu_do_interrupt(regs);
 }
 
 static struct hvm_function_table vmx_function_table = {
@@ -1154,6 +1164,7 @@ static struct hvm_function_table vmx_fun
     .inject_exception     = vmx_inject_exception,
     .init_hypercall_page  = vmx_init_hypercall_page,
     .event_pending        = vmx_event_pending,
+    .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
 };
@@ -1312,7 +1323,6 @@ void vmx_cpuid_intercept(
 
     case 0x00000006:
     case 0x00000009:
-    case 0x0000000A:
         *eax = *ebx = *ecx = *edx = 0;
         break;
 
@@ -2395,7 +2405,15 @@ static int vmx_do_msr_read(struct cpu_us
         /* No point in letting the guest see real MCEs */
         msr_content = 0;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+        /* Debug Trace Store is not supported. */
+        msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+                       MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+        break;
     default:
+        if ( vpmu_do_rdmsr(regs) )
+            goto done;
         switch ( long_mode_do_msr_read(regs) )
         {
             case HNDL_unhandled:
@@ -2602,6 +2620,8 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
+        if ( vpmu_do_wrmsr(regs) )
+            return 1;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2651,6 +2671,7 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_call_function_interrupt(void);
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
+    fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
 #ifdef CONFIG_X86_MCE_P4THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
@@ -2680,6 +2701,9 @@ static void vmx_do_extint(struct cpu_use
         break;
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
+        break;
+    case PMU_APIC_VECTOR:
+        smp_pmu_apic_interrupt(regs);
         break;
 #ifdef CONFIG_X86_MCE_P4THERMAL
     case THERMAL_APIC_VECTOR:
diff -r 4054cd60895b xen/arch/x86/hvm/vmx/vpmu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu.c	Tue Dec 11 23:47:06 2007 +0800
@@ -0,0 +1,101 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/percpu.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_wrmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_rdmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_interrupt(regs);
+    return 0;
+}
+
+void vpmu_save(void *info) {
+    struct vcpu *v = info;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_save(v);
+}
+
+void vpmu_load(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_load(v);
+}
+
+extern struct arch_vpmu_struct core_vpmu;
+void inline vpmu_initialise(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    __u8 cpu_model = current_cpu_data.x86_model;
+
+    switch (cpu_model)
+    {
+    case 15:
+    case 23:
+        vpmu->arch_vpmu = &core_vpmu;
+        dprintk(XENLOG_INFO, "Core 2 duo CPU detected for guest PMU usage.\n");
+        break;
+    default:
+        dprintk(XENLOG_WARNING, "Unsupport CPU model for guest PMU usage.\n");
+        return;
+    }
+
+    vpmu->int_vec = 0;
+    vpmu->arch_vpmu->arch_vpmu_initialise(v);
+}
+
+void inline vpmu_destroy(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_destroy(v);
+}
+
diff -r 4054cd60895b xen/arch/x86/hvm/vmx/vpmu_core.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu_core.c	Tue Dec 11 23:47:06 2007 +0800
@@ -0,0 +1,470 @@
+/*
+ * vpmu_core.c: CORE Arch specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core.h>
+
+static int arch_pmc_cnt = 0;
+
+static int core_get_pmc_count(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    if ( arch_pmc_cnt )
+        return arch_pmc_cnt;
+
+    cpuid(0xa, &eax, &ebx, &ecx, &edx);
+    return arch_pmc_cnt = (eax & 0xff00) >> 8;
+}
+
+static int is_core_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+    int i;
+
+    for (i=0; i < core_counters.num; i++)
+        if (core_counters.msr[i] == msr_index)
+        {
+            *type = MSR_TYPE_COUNTER;
+            *index = i;
+            return 1;
+        }
+    for (i=0; i < core_ctrls.num; i++)
+        if (core_ctrls.msr[i] == msr_index)
+        {
+            *type = MSR_TYPE_CTRL;
+            *index = i;
+            return 1;
+        }
+    if ( msr_index == MSR_CORE_PERF_GLOBAL_CTRL ||
+         msr_index == MSR_CORE_PERF_GLOBAL_STATUS ||
+         msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL )
+    {
+        *type = MSR_TYPE_GLOBAL;
+        return 1;
+    }
+
+    if ( msr_index >= MSR_IA32_PMC_BASE &&
+         msr_index < MSR_IA32_PMC_BASE + core_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_COUNTER;
+        *index = msr_index - MSR_IA32_PMC_BASE;
+        return 1;
+    }
+    if ( msr_index >= MSR_PERFEVTSEL_BASE &&
+         msr_index < MSR_PERFEVTSEL_BASE + core_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_CTRL;
+        *index = msr_index - MSR_PERFEVTSEL_BASE;
+        return 1;
+    }
+    return 0;
+}
+
+static void core_vpmu_set_msr_bitmap(char *msr_bitmap) {
+    int i;
+
+    /* Allow Read/Write PMU Counters MSR Directly. */
+    for (i=0; i < core_counters.num; i++)
+    {
+        clear_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for (i=0; i < core_get_pmc_count(); i++)
+    {
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PMC_BASE+i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PMC_BASE+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global controls Directly. */
+    for (i=0; i < core_ctrls.num; i++)
+        clear_bit(msraddr_to_bitpos(core_ctrls.msr[i]), msr_bitmap);
+    for (i=0; i < core_get_pmc_count(); i++)
+        clear_bit(msraddr_to_bitpos(MSR_PERFEVTSEL_BASE+i), msr_bitmap);
+}
+
+static void core_vpmu_unset_msr_bitmap(char *msr_bitmap) {
+    int i;
+
+    for (i=0; i < core_counters.num; i++)
+    {
+        set_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap);
+        set_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for (i=0; i < core_get_pmc_count(); i++)
+    {
+        set_bit(msraddr_to_bitpos(MSR_IA32_PMC_BASE+i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_IA32_PMC_BASE+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global controls Directly. */
+    for (i=0; i < core_ctrls.num; i++)
+        set_bit(msraddr_to_bitpos(core_ctrls.msr[i]), msr_bitmap);
+    for (i=0; i < core_get_pmc_count(); i++)
+        set_bit(msraddr_to_bitpos(MSR_PERFEVTSEL_BASE+i), msr_bitmap);
+}
+
+static inline void __core_vpmu_save(struct vcpu *v)
+{
+    int i;
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for (i=0; i < core_counters.num; i++)
+        rdmsrl(core_counters.msr[i], core_vpmu_cxt->counters[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        rdmsrl(MSR_IA32_PMC_BASE+i, core_vpmu_cxt->arch_msr_pair[i].counter);
+    core_vpmu_cxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
+}
+
+static void core_vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_CONTEXT_LOADED)) )
+        return;
+
+    __core_vpmu_save(v);
+
+    /* Unset PMU MSR bitmap to trap lazy load. */
+    if ( !(vpmu->flags & VPMU_RUNNING) && cpu_has_vmx_msr_bitmap )
+        core_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+    vpmu->flags &= ~VPMU_CONTEXT_LOADED;
+    return;
+}
+
+static inline void __core_vpmu_load(struct vcpu *v)
+{
+    int i;
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for (i=0; i < core_counters.num; i++)
+        wrmsrl(core_counters.msr[i], core_vpmu_cxt->counters[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        wrmsrl(MSR_IA32_PMC_BASE+i, core_vpmu_cxt->arch_msr_pair[i].counter);
+
+    for (i=0; i < core_ctrls.num; i++)
+        wrmsrl(core_ctrls.msr[i], core_vpmu_cxt->ctrls[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        wrmsrl(MSR_PERFEVTSEL_BASE+i, core_vpmu_cxt->arch_msr_pair[i].control);
+    vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
+                        core_vpmu_cxt->global_ctrl);
+
+    apic_write_around(APIC_LVTPC, core_vpmu_cxt->hw_lapic_lvtpc);
+}
+
+static void core_vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* Only when PMU is counting, we load PMU context immediately. */
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_RUNNING)) )
+        return;
+    __core_vpmu_load(v);
+    vpmu->flags |= VPMU_CONTEXT_LOADED;
+}
+
+static int core_vpmu_alloc_resource(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt;
+    struct core_pmu_enable *pmu_enable;
+
+    if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+
+    if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+
+    pmu_enable = xmalloc_bytes(sizeof(struct core_pmu_enable) +
+                 (core_get_pmc_count()-1)*sizeof(char));
+    if (!pmu_enable)
+        goto out1;
+    memset(pmu_enable, 0, sizeof(struct core_pmu_enable) +
+                 (core_get_pmc_count()-1)*sizeof(char));
+
+    core_vpmu_cxt = xmalloc_bytes(sizeof(struct core_vpmu_context) +
+                    (core_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    if (!core_vpmu_cxt)
+        goto out2;
+    memset(core_vpmu_cxt, 0, sizeof(struct core_vpmu_context) +
+                    (core_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    core_vpmu_cxt->pmu_enable = pmu_enable;
+    vpmu->context = (void *)core_vpmu_cxt;
+
+    return 1;
+ out2:
+    xfree(pmu_enable);
+ out1:
+    dprintk(XENLOG_WARNING, "Insufficient memory for PMU, PMU feature is \
+            unavailable on domain %d vcpu %d.\n",
+            v->vcpu_id, v->domain->domain_id);
+    return 0;
+}
+
+static void core_vpmu_save_msr_context(struct vcpu *v, int type,
+                                       int index, u64 msr_data)
+{
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    switch (type) {
+    case MSR_TYPE_CTRL:
+        core_vpmu_cxt->ctrls[index] = msr_data;
+        break;
+    case MSR_TYPE_ARCH_CTRL:
+        core_vpmu_cxt->arch_msr_pair[index].control = msr_data;
+        break;
+    case MSR_TYPE_GLOBAL:
+        core_vpmu_cxt->global_ctrl = msr_data;
+    }
+}
+
+static int core_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( !is_core_vpmu_msr(msr_index, type, index) )
+        return 0;
+
+    if ( unlikely(!(vpmu->flags & VPMU_CONTEXT_ALLOCATED)) &&
+         !core_vpmu_alloc_resource(current) )
+        return 0;
+    vpmu->flags |= VPMU_CONTEXT_ALLOCATED;
+
+    /* Do the lazy load staff. */
+    if ( !(vpmu->flags & VPMU_CONTEXT_LOADED) )
+    {
+        __core_vpmu_load(current);
+        vpmu->flags |= VPMU_CONTEXT_LOADED;
+        if ( cpu_has_vmx_msr_bitmap )
+            core_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+    }
+    return 1;
+}
+
+static int core_vpmu_do_wrmsr(struct cpu_user_regs *regs) {
+    u32 ecx = regs->ecx;
+    u64 msr_content;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = NULL;
+
+    if ( !core_vpmu_msr_common_check(ecx, &type, &index) )
+        return 0;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+    core_vpmu_cxt = vpmu->context;
+    switch (ecx) {
+        /* PMU Control MSRs */
+        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+            core_vpmu_cxt->global_ovf_status &= ~msr_content;
+            return 1;
+        case MSR_CORE_PERF_GLOBAL_STATUS:
+            dprintk(XENLOG_INFO, "Can not write readonly MSR: \
+                                MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+            vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
+            return 1;
+        case MSR_CORE_IA32_PEBS_ENABLE:
+            if (msr_content & MSR_CORE_IA32_PEBS_ENABLE_EN)
+                dprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, \
+                        which is not supported.\n");
+            return 1;
+        case MSR_CORE_IA32_DS_AREA:
+            dprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+            return 1;
+        case MSR_CORE_PERF_GLOBAL_CTRL:
+        {
+            int i;
+            u64 non_global_ctrl;
+            u32 global_ctrl = msr_content;
+
+            for ( i=0; i<core_get_pmc_count(); i++)
+            {
+                rdmsrl(MSR_PERFEVTSEL_BASE+i, non_global_ctrl);
+                core_vpmu_cxt->pmu_enable->arch_pmc_enable[i] =
+                        (global_ctrl & 1) &
+                        ((non_global_ctrl + (1<<22))>>22);
+                global_ctrl >>= 1;
+            }
+
+            rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl);
+            global_ctrl = msr_content >> 32;
+            for ( i=0; i < 3; i++ )
+            {
+                core_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                    (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0);
+                non_global_ctrl >>= 4;
+                global_ctrl >>= 1;
+            }
+            break;
+        }
+        case MSR_CORE_PERF_FIXED_CTR_CTRL:
+        {
+            int i;
+            u32 fix_ctrl = msr_content;
+            u64 global_ctrl;
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+            global_ctrl >>= 32;
+            for ( i=0; i < 3; i++ )
+            {
+                core_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                    (global_ctrl & 1) & ((fix_ctrl & 0x3)? 1: 0);
+                fix_ctrl >>= 4;
+                global_ctrl >>=  1;
+            }
+            break;
+        }
+        default:
+        {
+            int tmp = ecx - MSR_PERFEVTSEL_BASE;
+            u64 global_ctrl;
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+            if ( tmp >= 0 && tmp < core_get_pmc_count() )
+                core_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] =
+                    ((global_ctrl >> tmp ) & 1) &
+                    ((msr_content & (1<<22)) >> 22);
+        }
+
+    }
+
+    if ( core_vpmu_cxt->pmu_enable->fixed_ctr_enable[0] == 1 ||
+         core_vpmu_cxt->pmu_enable->fixed_ctr_enable[1] == 1 ||
+         core_vpmu_cxt->pmu_enable->fixed_ctr_enable[2] == 1 )
+        vpmu->flags |= VPMU_RUNNING;
+    else
+    {
+        int i;
+        for (i=0; i < core_get_pmc_count(); i++)
+        {
+            if ( core_vpmu_cxt->pmu_enable->arch_pmc_enable[i] == 1 )
+            {
+                vpmu->flags |= VPMU_RUNNING;
+                break;
+            }
+        }
+        if ( i == core_get_pmc_count() )
+            vpmu->flags &= ~VPMU_RUNNING;
+    }
+
+    core_vpmu_save_msr_context(v, type, index, msr_content);
+    if ( type != MSR_TYPE_GLOBAL )
+        wrmsrl(ecx, msr_content);
+    else
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+
+    return 1;
+}
+
+static int core_vpmu_do_rdmsr(struct cpu_user_regs *regs) {
+    u64 msr_content = 0;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = NULL;
+
+    if ( !core_vpmu_msr_common_check(regs->ecx, &type, &index) )
+        return 0;
+
+    core_vpmu_cxt = vpmu->context;
+    switch (regs->ecx) {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        break;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        msr_content = core_vpmu_cxt->global_ovf_status;
+        break;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &msr_content);
+        break;
+    default:
+        rdmsrl(regs->ecx, msr_content);
+    }
+
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static int core_vpmu_do_interrupt(struct cpu_user_regs *regs) {
+    struct vcpu *v = current;
+    u64 msr_content;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = vpmu->context;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+    if ( !msr_content )
+        return 0;
+    core_vpmu_cxt->global_ovf_status |= msr_content;
+    wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0xC000000700000003);
+
+    apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+    if ( !is_lvtpc_enabled(v) )
+        return 1;
+
+    vlapic_set_reg(vlapic, APIC_LVTPC,
+               vlapic_get_reg(vlapic, APIC_LVTPC) | APIC_LVT_MASKED);
+    if (vpmu->int_vec)
+        vlapic_set_irq(vcpu_vlapic(v),vpmu->int_vec, 0);
+    else
+        test_and_set_bool(v->nmi_pending);
+    return 1;
+}
+
+static void core_vpmu_initialise(struct vcpu *v)
+{
+    /* Clear global control msr to disable all counters. */
+    wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+}
+
+static void core_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = vpmu->context;
+
+    if ( !vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        return;
+    xfree(core_vpmu_cxt->pmu_enable);
+    xfree(vpmu->context);
+}
+
+struct arch_vpmu_struct core_vpmu = {
+    .do_wrmsr = core_vpmu_do_wrmsr,
+    .do_rdmsr = core_vpmu_do_rdmsr,
+    .do_interrupt = core_vpmu_do_interrupt,
+    .arch_vpmu_initialise = core_vpmu_initialise,
+    .arch_vpmu_destroy = core_vpmu_destroy,
+    .arch_vpmu_save = core_vpmu_save,
+    .arch_vpmu_load = core_vpmu_load
+};
+
diff -r 4054cd60895b xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/arch/x86/i8259.c	Tue Dec 11 23:47:06 2007 +0800
@@ -72,6 +72,7 @@ BUILD_SMP_INTERRUPT(apic_timer_interrupt
 BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 
 #define IRQ(x,y) \
diff -r 4054cd60895b xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/hvm/hvm.h	Tue Dec 11 23:47:06 2007 +0800
@@ -124,6 +124,7 @@ struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_pending)(struct vcpu *v);
+    int  (*do_pmu_interrupt)(struct cpu_user_regs *regs);
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
@@ -246,6 +247,11 @@ static inline int hvm_event_pending(stru
 static inline int hvm_event_pending(struct vcpu *v)
 {
     return hvm_funcs.event_pending(v);
+}
+
+static inline int hvm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return hvm_funcs.do_pmu_interrupt(regs);
 }
 
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
diff -r 4054cd60895b xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/hvm/vlapic.h	Tue Dec 11 23:47:06 2007 +0800
@@ -94,5 +94,6 @@ int vlapic_match_logical_addr(struct vla
 
 int is_lvtt(struct vcpu *v, int vector);
 int is_lvtt_enabled(struct vcpu *v);
+int is_lvtpc_enabled(struct vcpu *v);
 
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r 4054cd60895b xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Tue Dec 11 23:47:06 2007 +0800
@@ -22,6 +22,7 @@
 #include <asm/config.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/vmx/cpu.h>
+#include <asm/hvm/vmx/vpmu.h>
 
 #ifdef VMXASSIST
 #include <public/hvm/vmx_assist.h>
@@ -75,6 +76,9 @@ struct arch_vmx_struct {
 
     /* Cache of cpu execution control. */
     u32                  exec_control;
+
+    /* PMU */
+    struct vpmu_struct   vpmu;
 
 #ifdef __x86_64__
     struct vmx_msr_state msr_state;
diff -r 4054cd60895b xen/include/asm-x86/hvm/vmx/vpmu.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu.h	Tue Dec 11 23:47:06 2007 +0800
@@ -0,0 +1,79 @@
+/*
+ * pmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000)
+#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.hvm_vcpu.u.vmx.vpmu)
+#define vpmu_vcpu(vpmu)   (container_of((vpmu), struct vcpu, \
+                                          arch.hvm_vcpu.u.vmx.vpmu))
+#define vpmu_domain(vpmu) (vpmu_vcpu(vpmu)->domain)
+
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+struct pmumsr {
+	unsigned int num;
+	u32 *msr;
+};
+
+struct msr_load_store_entry {
+	u32 msr_index;
+	u32 msr_reserved;
+	u64 msr_data;
+};
+
+/* Arch specific definitions shared by all vpmus */
+struct arch_vpmu_struct {
+	int (*do_wrmsr)(struct cpu_user_regs *regs);
+	int (*do_rdmsr)(struct cpu_user_regs *regs);
+    int (*do_interrupt)(struct cpu_user_regs *regs);
+    void (*arch_vpmu_initialise)(struct vcpu *v);
+    void (*arch_vpmu_destroy)(struct vcpu *v);
+    void (*arch_vpmu_save)(struct vcpu *v);
+    void (*arch_vpmu_load)(struct vcpu *v);
+};
+
+struct vpmu_struct {
+	int int_vec;
+    u32 flags;
+    void *context;
+	struct arch_vpmu_struct *arch_vpmu;
+};
+
+#define VPMU_CONTEXT_ALLOCATED              0x1
+#define VPMU_CONTEXT_LOADED                 0x2
+#define VPMU_RUNNING                        0x4
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs);
+void inline vpmu_initialise(struct vcpu *v);
+void inline vpmu_destroy(struct vcpu *v);
+void inline vpmu_save(void *info);
+void inline vpmu_load(struct vcpu *v);
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+
diff -r 4054cd60895b xen/include/asm-x86/hvm/vmx/vpmu_core.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu_core.h	Tue Dec 11 23:47:06 2007 +0800
@@ -0,0 +1,75 @@
+
+/*
+ * vpmu_core.c: CORE Arch specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_CORE_H_
+#define __ASM_X86_HVM_VPMU_CORE_H_
+
+/* Core 2 Non-architectual Performance Counter MSRs. */
+u32 core_counters_msr[] =   {
+    MSR_CORE_PERF_FIXED_CTR0,
+    MSR_CORE_PERF_FIXED_CTR1,
+    MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core_ctrls_msr[] = {
+    MSR_CORE_PERF_FIXED_CTR_CTRL,
+    MSR_CORE_IA32_PEBS_ENABLE,
+    MSR_CORE_IA32_DS_AREA};
+
+/* Core 2 global control MSRs. */
+u32 core_global_msr[] = {
+    MSR_CORE_PERF_GLOBAL_CTRL,
+    MSR_CORE_PERF_GLOBAL_STATUS,
+    MSR_CORE_PERF_GLOBAL_OVF_CTRL};
+
+struct pmumsr core_counters = {
+    3,
+    core_counters_msr
+};
+
+struct pmumsr core_ctrls = {
+    3,
+    core_ctrls_msr
+};
+
+struct arch_msr_pair {
+    u64 counter;
+    u64 control;
+};
+
+struct core_pmu_enable {
+    char fixed_ctr_enable[3];
+    char arch_pmc_enable[1];
+};
+
+struct core_vpmu_context {
+    struct core_pmu_enable *pmu_enable;
+    u64 counters[3];
+    u64 ctrls[3];
+    u64 global_ctrl;
+    u64 global_ovf_status;
+    u32 hw_lapic_lvtpc;
+    struct arch_msr_pair arch_msr_pair[1];
+};
+
+#endif /* __ASM_X86_HVM_VPMU_CORE_H_ */
+
diff -r 4054cd60895b xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/irq.h	Tue Dec 11 23:47:06 2007 +0800
@@ -28,6 +28,7 @@ fastcall void call_function_interrupt(vo
 fastcall void call_function_interrupt(void);
 fastcall void apic_timer_interrupt(void);
 fastcall void error_interrupt(void);
+fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 
diff -r 4054cd60895b xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h	Tue Dec 11 23:47:06 2007 +0800
@@ -9,13 +9,14 @@
 #define CALL_FUNCTION_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR	0xfa
 #define LOCAL_TIMER_VECTOR	0xf9
+#define PMU_APIC_VECTOR		0xf8
 
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR	0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf8
+#define LAST_HIPRIORITY_VECTOR  0xf7
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR	0xe0
diff -r 4054cd60895b xen/include/asm-x86/msr-index.h
--- a/xen/include/asm-x86/msr-index.h	Mon Dec 10 13:49:22 2007 +0000
+++ b/xen/include/asm-x86/msr-index.h	Tue Dec 11 23:47:06 2007 +0800
@@ -388,6 +388,15 @@
 #define MSR_CORE_PERF_GLOBAL_CTRL	0x0000038f
 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x00000390
 
+// Performance MSRs for Conroe
+#define MSR_IA32_PMC_BASE                     0x0c1
+#define MSR_PERFEVTSEL_BASE                   0x186
+#define MSR_CORE_LASTBRANCH_TOS               0x1c9
+#define MSR_CORE_IA32_PEBS_ENABLE             0x3f1
+#define       MSR_CORE_IA32_PEBS_ENABLE_EN        1
+#define MSR_CORE_IA32_DS_AREA                 0x600
+#define MSR_CORE_IA32_PERF_CAPALITIES         0x345
+
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF0		0x00001900
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Enable Core 2 Duo Performance Counters in HVM guest
  2007-12-11 10:06 [PATCH] Enable Core 2 Duo Performance Counters in HVM guest Shan, Haitao
@ 2007-12-11 10:37 ` Keir Fraser
  2007-12-11 13:32   ` Shan, Haitao
  0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2007-12-11 10:37 UTC (permalink / raw)
  To: Shan, Haitao; +Cc: xen-devel, Jiang, Yunhong

[-- Attachment #1.1: Type: text/plain, Size: 1976 bytes --]

 * Don¹t define new MSR macros  use the existing ones (or fixup the
eixsting ones to have better names if you really think that is necessary,
although I doubt it is in this case).

 * Some of the indentation is rather wacky.

 * Poking the LVTPC as a one-off event only when the VCPU writes its virtual
LVTPC is not going to fly. What if the VCPU is subsequently migrated to a
different physical CPU? In any case the LVTPC is a shared resource that
needs synchronised access via e.g., context switching or mutual exclusion
(see the next point below).

 * How does this interact with, for example, xenoprof? Ideally we should be
able to run xenoprof and this concurrently over disjoint subsets of domains.
At the very least xenoprof and this patch should avoid stomping on each
other by implementing some form of mutual exclusion. Probably some resource
sharing (e.g., the PMC interrupt vector) also is possible. I expect this
will require some design thought, but unfortunately that is the price for
being second into the tree.

 * Impact on save/restore to/from Core-2 processors: has this been tested at
all?

 -- Keir

On 11/12/07 10:06, "Shan, Haitao" <haitao.shan@intel.com> wrote:

> Hi, Keir, 
> 
> Currently, HVM guests do not have access to performance counters. So it is not
> possible to use performance analyzer software such as vtune in HVM guest to
> analyze programme performance. Other usage of performance counters , for
> example, the NMI watchdog, won't function either.
> 
> This patch will enable performance counters in HVM guest. Currently, only Core
> 2 Duo is implemented. Tests are carried out using Intel Vtune Performance
> Analyzer in Windows XP Professional with 2 vcpus.
> 
> There are some notes at the beginning of the patch, describing implementation
> choices in this patch.
> 
> Signed-off-by:    Haitao Shan <haitao.shan@intel.com>
>  <<pmu.patch>> 
> 
> Best Regards 
> Haitao Shan 
> 

[-- Attachment #1.2: Type: text/html, Size: 3025 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] Enable Core 2 Duo Performance Counters in HVM guest
  2007-12-11 10:37 ` Keir Fraser
@ 2007-12-11 13:32   ` Shan, Haitao
  2007-12-11 15:01     ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-11 13:32 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong

[-- Attachment #1.1: Type: text/plain, Size: 2875 bytes --]

Hi, Keir

Please see my comments embedded.

________________________________

From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
Sent: 2007年12月11日 18:38
To: Shan, Haitao
Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: Re: [PATCH] Enable Core 2 Duo Performance Counters in HVM guest

* Don’t define new MSR macros - use the existing ones (or fixup the eixsting ones to have better names if you really think that is necessary, although I doubt it is in this case).

I will update.

 * Some of the indentation is rather wacky.

I will update.

 * Poking the LVTPC as a one-off event only when the VCPU writes its virtual LVTPC is not going to fly. What if the VCPU is subsequently migrated to a different physical CPU? In any case the LVTPC is a shared resource that needs synchronised access via e.g., context switching or mutual exclusion (see the next point below).

I think in the patch I will save/load LVTPC during context switch. 

 * How does this interact with, for example, xenoprof? Ideally we should be able to run xenoprof and this concurrently over disjoint subsets of domains. At the very least xenoprof and this patch should avoid stomping on each other by implementing some form of mutual exclusion. Probably some resource sharing (e.g., the PMC interrupt vector) also is possible. I expect this will require some design thought, but unfortunately that is the price for being second into the tree.

Yes. How to share PMU resources will need careful design. But I really don’t think it possible to run both concurrently unless we do some sort of PMC’s partition. Of course, it is silly of me not to implement a mechanism for mutual exclusion. I will implement one. 

 * Impact on save/restore to/from Core-2 processors: has this been tested at all?

I will try doing the test. I think whether it should support save/restore can be argued. Does anyone want to run VTune/Oprofile during save/restore? This can be hardly a good usage model. But at least, I update it to ensure it does not break current save/restore.

 -- Keir

On 11/12/07 10:06, "Shan, Haitao" <haitao.shan@intel.com> wrote:

Hi, Keir, 

Currently, HVM guests do not have access to performance counters. So it is not possible to use performance analyzer software such as vtune in HVM guest to analyze programme performance. Other usage of performance counters , for example, the NMI watchdog, won't function either.

This patch will enable performance counters in HVM guest. Currently, only Core 2 Duo is implemented. Tests are carried out using Intel Vtune Performance Analyzer in Windows XP Professional with 2 vcpus.

There are some notes at the beginning of the patch, describing implementation choices in this patch. 

Signed-off-by:    Haitao Shan <haitao.shan@intel.com> 
 <<pmu.patch>> 

Best Regards 
Haitao Shan 

[-- Attachment #1.2: Type: text/html, Size: 10573 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Enable Core 2 Duo Performance Counters in HVM guest
  2007-12-11 13:32   ` Shan, Haitao
@ 2007-12-11 15:01     ` Keir Fraser
  2007-12-14  7:49       ` Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest Shan, Haitao
  0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2007-12-11 15:01 UTC (permalink / raw)
  To: Shan, Haitao; +Cc: xen-devel, Jiang, Yunhong

[-- Attachment #1.1: Type: text/plain, Size: 2608 bytes --]

Oh yes, I see you save/restore MSR state and LVTPC across context switch.
That¹s fine then. But I don¹t think you should pass through any of the
virtual LVTPC register fields at all, except to toggle the mask field of the
real LVTPC depending on whether or not performance counter interrupt
delivery is currently enabled for the VCPU (I¹d implement this just to avoid
unnecessary real interrupts which are certainly not going to turn into
virtual interrupts). There are no real LVTPC fields which should really be
under guest control, and so cooking the guest LVTPC value and poking it into
the real LVTPC in the vlapic code looks odd. As does save/restore of the
whole LVTPC on context switch -- at most you should need to track
masked/not-masked. And you¹re letting the guest have access to reserved bits
of the real LVTPC, which is not good...

Ignore my comment about save/restore ‹ I misread your context-switching code
as HVM save/restore code!

 -- Keir

On 11/12/07 13:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:

>  * Poking the LVTPC as a one-off event only when the VCPU writes its virtual
> LVTPC is not going to fly. What if the VCPU is subsequently migrated to a
> different physical CPU? In any case the LVTPC is a shared resource that needs
> synchronised access via e.g., context switching or mutual exclusion (see the
> next point below).
> I think in the patch I will save/load LVTPC during context switch.
> 
>  * How does this interact with, for example, xenoprof? Ideally we should be
> able to run xenoprof and this concurrently over disjoint subsets of domains.
> At the very least xenoprof and this patch should avoid stomping on each other
> by implementing some form of mutual exclusion. Probably some resource sharing
> (e.g., the PMC interrupt vector) also is possible. I expect this will require
> some design thought, but unfortunately that is the price for being second into
> the tree.
> Yes. How to share PMU resources will need careful design. But I really don¹t
> think it possible to run both concurrently unless we do some sort of PMC¹s
> partition. Of course, it is silly of me not to implement a mechanism for
> mutual exclusion. I will implement one.
> 
>  * Impact on save/restore to/from Core-2 processors: has this been tested at
> all?
> I will try doing the test. I think whether it should support save/restore can
> be argued. Does anyone want to run VTune/Oprofile during save/restore? This
> can be hardly a good usage model. But at least, I update it to ensure it does
> not break current save/restore.

[-- Attachment #1.2: Type: text/html, Size: 3198 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest
  2007-12-11 15:01     ` Keir Fraser
@ 2007-12-14  7:49       ` Shan, Haitao
  2007-12-14  8:31         ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-14  7:49 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 3640 bytes --]

Hi, Keir,
 
Thanks for your detailed comments. I have worked out an updated patch.
 
I removed my own MSR macros definitions and hard TAB indentation. Also, I removed LVTPC write-through in vlapic_write. Now, only when guest both enables counting and interrupt, physical LVTPC is written. And when vcpu is scheduled out, LVTPC is masked.
In addition, I employed a "first come, first service" policy to grant PMU access to xenoprof/hvm_guest. When access is granted to hvm guest, oprofile will get "Device Busy" . On the contrary, guest can not use PMU like before.
HVM save/restore is tested, the patch will not break current code.
 
Can you have a look and give me your comments? Thanks in advance!

Best Regards 
Haitao Shan 

 

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Keir Fraser
Sent: 2007年12月11日 23:02
To: Shan, Haitao
Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: [Xen-devel] Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest


Oh yes, I see you save/restore MSR state and LVTPC across context switch. That’s fine then. But I don’t think you should pass through any of the virtual LVTPC register fields at all, except to toggle the mask field of the real LVTPC depending on whether or not performance counter interrupt delivery is currently enabled for the VCPU (I’d implement this just to avoid unnecessary real interrupts which are certainly not going to turn into virtual interrupts). There are no real LVTPC fields which should really be under guest control, and so cooking the guest LVTPC value and poking it into the real LVTPC in the vlapic code looks odd. As does save/restore of the whole LVTPC on context switch -- at most you should need to track masked/not-masked. And you’re letting the guest have access to reserved bits of the real LVTPC, which is not good...

Ignore my comment about save/restore ― I misread your context-switching code as HVM save/restore code!

 -- Keir

On 11/12/07 13:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:



	* Poking the LVTPC as a one-off event only when the VCPU writes its virtual LVTPC is not going to fly. What if the VCPU is subsequently migrated to a different physical CPU? In any case the LVTPC is a shared resource that needs synchronised access via e.g., context switching or mutual exclusion (see the next point below).
	I think in the patch I will save/load LVTPC during context switch. 
	
	 * How does this interact with, for example, xenoprof? Ideally we should be able to run xenoprof and this concurrently over disjoint subsets of domains. At the very least xenoprof and this patch should avoid stomping on each other by implementing some form of mutual exclusion. Probably some resource sharing (e.g., the PMC interrupt vector) also is possible. I expect this will require some design thought, but unfortunately that is the price for being second into the tree.
	Yes. How to share PMU resources will need careful design. But I really don’t think it possible to run both concurrently unless we do some sort of PMC’s partition. Of course, it is silly of me not to implement a mechanism for mutual exclusion. I will implement one. 
	
	 * Impact on save/restore to/from Core-2 processors: has this been tested at all?
	I will try doing the test. I think whether it should support save/restore can be argued. Does anyone want to run VTune/Oprofile during save/restore? This can be hardly a good usage model. But at least, I update it to ensure it does not break current save/restore.
	




[-- Attachment #1.2: Type: text/html, Size: 5935 bytes --]

[-- Attachment #2: pmu.patch --]
[-- Type: application/octet-stream, Size: 38642 bytes --]

diff -r 8f0cbfc478d6 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/apic.c	Thu Dec 13 23:47:33 2007 +0800
@@ -93,6 +93,9 @@ void __init apic_intr_init(void)
     /* IPI vectors for APIC spurious and error interrupts */
     set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
     set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+    /* Performance Counters Interrupt */
+    set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
     /* thermal monitor LVT interrupt */
 #ifdef CONFIG_X86_MCE_P4THERMAL
@@ -1227,6 +1230,16 @@ fastcall void smp_error_interrupt(struct
 }
 
 /*
+ * This interrupt handles performance counters interrupt
+ */
+
+fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs)
+{
+    ack_APIC_irq();
+    hvm_do_pmu_interrupt(regs);
+}
+
+/*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Thu Dec 13 23:47:33 2007 +0800
@@ -867,6 +867,11 @@ static int svm_event_pending(struct vcpu
     return vmcb->eventinj.fields.v;
 }
 
+static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+
 static struct hvm_function_table svm_function_table = {
     .name                 = "SVM",
     .cpu_down             = svm_cpu_down,
@@ -890,7 +895,8 @@ static struct hvm_function_table svm_fun
     .set_tsc_offset       = svm_set_tsc_offset,
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
-    .event_pending        = svm_event_pending
+    .event_pending        = svm_event_pending,
+    .do_pmu_interrupt     = svm_do_pmu_interrupt
 };
 
 int start_svm(struct cpuinfo_x86 *c)
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/Makefile	Thu Dec 13 23:47:33 2007 +0800
@@ -9,3 +9,5 @@ endif
 endif
 obj-y += vmcs.o
 obj-y += vmx.o
+obj-y += vpmu.o
+obj-y += vpmu_core.o
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Dec 13 23:47:33 2007 +0800
@@ -90,6 +90,8 @@ static int vmx_vcpu_initialise(struct vc
         return rc;
     }
 
+    vpmu_initialise(v);
+
     vmx_install_vlapic_mapping(v);
 
 #ifndef VMXASSIST
@@ -104,6 +106,7 @@ static void vmx_vcpu_destroy(struct vcpu
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
     vmx_destroy_vmcs(v);
+    vpmu_destroy(v);
 }
 
 #ifdef __x86_64__
@@ -742,6 +745,7 @@ static void vmx_ctxt_switch_from(struct 
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
+    vpmu_save(v);
 }
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -752,6 +756,7 @@ static void vmx_ctxt_switch_to(struct vc
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vpmu_load(v);
 }
 
 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
@@ -1129,6 +1134,11 @@ static int vmx_event_pending(struct vcpu
 {
     ASSERT(v == current);
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
+}
+
+static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return vpmu_do_interrupt(regs);
 }
 
 static struct hvm_function_table vmx_function_table = {
@@ -1154,6 +1164,7 @@ static struct hvm_function_table vmx_fun
     .inject_exception     = vmx_inject_exception,
     .init_hypercall_page  = vmx_init_hypercall_page,
     .event_pending        = vmx_event_pending,
+    .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
 };
@@ -1312,7 +1323,6 @@ void vmx_cpuid_intercept(
 
     case 0x00000006:
     case 0x00000009:
-    case 0x0000000A:
         *eax = *ebx = *ecx = *edx = 0;
         break;
 
@@ -2395,7 +2405,15 @@ static int vmx_do_msr_read(struct cpu_us
         /* No point in letting the guest see real MCEs */
         msr_content = 0;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+        /* Debug Trace Store is not supported. */
+        msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+                       MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+        break;
     default:
+        if ( vpmu_do_rdmsr(regs) )
+            goto done;
         switch ( long_mode_do_msr_read(regs) )
         {
             case HNDL_unhandled:
@@ -2602,6 +2620,8 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
+        if ( vpmu_do_wrmsr(regs) )
+            return 1;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2651,6 +2671,7 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_call_function_interrupt(void);
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
+    fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
 #ifdef CONFIG_X86_MCE_P4THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
@@ -2680,6 +2701,9 @@ static void vmx_do_extint(struct cpu_use
         break;
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
+        break;
+    case PMU_APIC_VECTOR:
+        smp_pmu_apic_interrupt(regs);
         break;
 #ifdef CONFIG_X86_MCE_P4THERMAL
     case THERMAL_APIC_VECTOR:
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vpmu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu.c	Thu Dec 13 23:47:33 2007 +0800
@@ -0,0 +1,108 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/percpu.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_wrmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_rdmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu )
+        return vpmu->arch_vpmu->do_interrupt(regs);
+    return 0;
+}
+
+void vpmu_save(void *info) {
+    struct vcpu *v = info;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_save(v);
+}
+
+void vpmu_load(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_load(v);
+}
+
+extern struct arch_vpmu_struct core_vpmu;
+void inline vpmu_initialise(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    __u8 cpu_model = current_cpu_data.x86_model;
+
+    /* If it is not a fresh initialization, release all resources
+     * before initialise again.
+     */
+    if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        vpmu_destroy(v);
+
+    switch (cpu_model)
+    {
+    case 15:
+    case 23:
+        vpmu->arch_vpmu = &core_vpmu;
+        dprintk(XENLOG_INFO, "Core 2 duo CPU detected for guest PMU usage.\n");
+        break;
+    default:
+        dprintk(XENLOG_WARNING, "Unsupport CPU model for guest PMU usage.\n");
+        return;
+    }
+
+    vpmu->flags = 0;
+    vpmu->context = NULL;
+    vpmu->arch_vpmu->arch_vpmu_initialise(v);
+}
+
+void inline vpmu_destroy(struct vcpu *v) {
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu )
+        vpmu->arch_vpmu->arch_vpmu_destroy(v);
+}
+
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vpmu_core.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu_core.c	Fri Dec 14 00:09:24 2007 +0800
@@ -0,0 +1,484 @@
+/*
+ * vpmu_core.c: CORE Arch specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core.h>
+
+static int arch_pmc_cnt = 0;
+
+static int core_get_pmc_count(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    if ( arch_pmc_cnt )
+        return arch_pmc_cnt;
+
+    cpuid(0xa, &eax, &ebx, &ecx, &edx);
+    return arch_pmc_cnt = (eax & 0xff00) >> 8;
+}
+
+static int is_core_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+    int i;
+
+    for (i=0; i < core_counters.num; i++)
+        if (core_counters.msr[i] == msr_index)
+        {
+            *type = MSR_TYPE_COUNTER;
+            *index = i;
+            return 1;
+        }
+    for (i=0; i < core_ctrls.num; i++)
+        if (core_ctrls.msr[i] == msr_index)
+        {
+            *type = MSR_TYPE_CTRL;
+            *index = i;
+            return 1;
+        }
+
+    if ( msr_index == MSR_CORE_PERF_GLOBAL_CTRL ||
+         msr_index == MSR_CORE_PERF_GLOBAL_STATUS ||
+         msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL )
+    {
+        *type = MSR_TYPE_GLOBAL;
+        return 1;
+    }
+
+    if ( msr_index >= MSR_IA32_PERFCTR0 &&
+         msr_index < MSR_IA32_PERFCTR0 + core_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_COUNTER;
+        *index = msr_index - MSR_IA32_PERFCTR0;
+        return 1;
+    }
+    if ( msr_index >= MSR_P6_EVNTSEL0 &&
+         msr_index < MSR_P6_EVNTSEL0 + core_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_CTRL;
+        *index = msr_index - MSR_P6_EVNTSEL0;
+        return 1;
+    }
+    return 0;
+}
+
+static void core_vpmu_set_msr_bitmap(char *msr_bitmap) {
+    int i;
+
+    /* Allow Read/Write PMU Counters MSR Directly. */
+    for (i=0; i < core_counters.num; i++)
+    {
+        clear_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for (i=0; i < core_get_pmc_count(); i++)
+    {
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global controls Directly. */
+    for (i=0; i < core_ctrls.num; i++)
+        clear_bit(msraddr_to_bitpos(core_ctrls.msr[i]), msr_bitmap);
+    for (i=0; i < core_get_pmc_count(); i++)
+        clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static void core_vpmu_unset_msr_bitmap(char *msr_bitmap) {
+    int i;
+
+    for (i=0; i < core_counters.num; i++)
+    {
+        set_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap);
+        set_bit(msraddr_to_bitpos(core_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for (i=0; i < core_get_pmc_count(); i++)
+    {
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global controls Directly. */
+    for (i=0; i < core_ctrls.num; i++)
+        set_bit(msraddr_to_bitpos(core_ctrls.msr[i]), msr_bitmap);
+    for (i=0; i < core_get_pmc_count(); i++)
+        set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static inline void __core_vpmu_save(struct vcpu *v)
+{
+    int i;
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for (i=0; i < core_counters.num; i++)
+        rdmsrl(core_counters.msr[i], core_vpmu_cxt->counters[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        rdmsrl(MSR_IA32_PERFCTR0+i, core_vpmu_cxt->arch_msr_pair[i].counter);
+    core_vpmu_cxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
+    apic_write(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+}
+
+static void core_vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_CONTEXT_LOADED)) )
+        return;
+
+    __core_vpmu_save(v);
+
+    /* Unset PMU MSR bitmap to trap lazy load. */
+    if ( !(vpmu->flags & VPMU_RUNNING) && cpu_has_vmx_msr_bitmap )
+        core_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+    vpmu->flags &= ~VPMU_CONTEXT_LOADED;
+    return;
+}
+
+static inline void __core_vpmu_load(struct vcpu *v)
+{
+    int i;
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for (i=0; i < core_counters.num; i++)
+        wrmsrl(core_counters.msr[i], core_vpmu_cxt->counters[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        wrmsrl(MSR_IA32_PERFCTR0+i, core_vpmu_cxt->arch_msr_pair[i].counter);
+
+    for (i=0; i < core_ctrls.num; i++)
+        wrmsrl(core_ctrls.msr[i], core_vpmu_cxt->ctrls[i]);
+    for (i=0; i < core_get_pmc_count(); i++)
+        wrmsrl(MSR_P6_EVNTSEL0+i, core_vpmu_cxt->arch_msr_pair[i].control);
+
+    apic_write_around(APIC_LVTPC, core_vpmu_cxt->hw_lapic_lvtpc);
+}
+
+static void core_vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* Only when PMU is counting, we load PMU context immediately. */
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_RUNNING)) )
+        return;
+    __core_vpmu_load(v);
+    vpmu->flags |= VPMU_CONTEXT_LOADED;
+}
+
+static int core_vpmu_alloc_resource(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt;
+    struct core_pmu_enable *pmu_enable;
+
+    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+        return 0;
+
+    wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+    if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+
+    if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+    vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, -1ULL);
+
+    pmu_enable = xmalloc_bytes(sizeof(struct core_pmu_enable) +
+                 (core_get_pmc_count()-1)*sizeof(char));
+    if (!pmu_enable)
+        goto out1;
+    memset(pmu_enable, 0, sizeof(struct core_pmu_enable) +
+                 (core_get_pmc_count()-1)*sizeof(char));
+
+    core_vpmu_cxt = xmalloc_bytes(sizeof(struct core_vpmu_context) +
+                    (core_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    if (!core_vpmu_cxt)
+        goto out2;
+    memset(core_vpmu_cxt, 0, sizeof(struct core_vpmu_context) +
+                    (core_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    core_vpmu_cxt->pmu_enable = pmu_enable;
+    vpmu->context = (void *)core_vpmu_cxt;
+
+    return 1;
+ out2:
+    xfree(pmu_enable);
+ out1:
+    dprintk(XENLOG_WARNING, "Insufficient memory for PMU, PMU feature is \
+            unavailable on domain %d vcpu %d.\n",
+            v->vcpu_id, v->domain->domain_id);
+    return 0;
+}
+
+static void core_vpmu_save_msr_context(struct vcpu *v, int type,
+                                       int index, u64 msr_data)
+{
+    struct core_vpmu_context *core_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    switch (type) {
+    case MSR_TYPE_CTRL:
+        core_vpmu_cxt->ctrls[index] = msr_data;
+        break;
+    case MSR_TYPE_ARCH_CTRL:
+        core_vpmu_cxt->arch_msr_pair[index].control = msr_data;
+        break;
+    }
+}
+
+static int core_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( !is_core_vpmu_msr(msr_index, type, index) )
+        return 0;
+
+    if ( unlikely(!(vpmu->flags & VPMU_CONTEXT_ALLOCATED)) &&
+         !core_vpmu_alloc_resource(current) )
+        return 0;
+    vpmu->flags |= VPMU_CONTEXT_ALLOCATED;
+
+    /* Do the lazy load staff. */
+    if ( !(vpmu->flags & VPMU_CONTEXT_LOADED) )
+    {
+        __core_vpmu_load(current);
+        vpmu->flags |= VPMU_CONTEXT_LOADED;
+        if ( cpu_has_vmx_msr_bitmap )
+            core_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+    }
+    return 1;
+}
+
+static int core_vpmu_do_wrmsr(struct cpu_user_regs *regs) {
+    u32 ecx = regs->ecx;
+    u64 msr_content;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = NULL;
+
+    if ( !core_vpmu_msr_common_check(ecx, &type, &index) )
+        return 0;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+    core_vpmu_cxt = vpmu->context;
+    switch (ecx) {
+        /* PMU Control MSRs */
+        case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+            core_vpmu_cxt->global_ovf_status &= ~msr_content;
+            return 1;
+        case MSR_CORE_PERF_GLOBAL_STATUS:
+            dprintk(XENLOG_INFO, "Can not write readonly MSR: \
+                                MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+            vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
+            return 1;
+        case MSR_IA32_PEBS_ENABLE:
+            if (msr_content & 1)
+                dprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, \
+                        which is not supported.\n");
+            return 1;
+        case MSR_IA32_DS_AREA:
+            dprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+            return 1;
+        case MSR_CORE_PERF_GLOBAL_CTRL:
+        {
+            int i;
+            u64 non_global_ctrl;
+            u32 global_ctrl = msr_content;
+
+            for ( i=0; i<core_get_pmc_count(); i++)
+            {
+                rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl);
+                core_vpmu_cxt->pmu_enable->arch_pmc_enable[i] =
+                        (global_ctrl & 1) &
+                        ((non_global_ctrl + (1<<22))>>22);
+                global_ctrl >>= 1;
+            }
+
+            rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl);
+            global_ctrl = msr_content >> 32;
+            for ( i=0; i < 3; i++ )
+            {
+                core_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                    (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0);
+                non_global_ctrl >>= 4;
+                global_ctrl >>= 1;
+            }
+            break;
+        }
+        case MSR_CORE_PERF_FIXED_CTR_CTRL:
+        {
+            int i;
+            u32 fix_ctrl = msr_content;
+            u64 global_ctrl;
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+            global_ctrl >>= 32;
+            for ( i=0; i < 3; i++ )
+            {
+                core_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                    (global_ctrl & 1) & ((fix_ctrl & 0x3)? 1: 0);
+                fix_ctrl >>= 4;
+                global_ctrl >>=  1;
+            }
+            break;
+        }
+        default:
+        {
+            int tmp = ecx - MSR_P6_EVNTSEL0;
+            u64 global_ctrl;
+            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+            if ( tmp >= 0 && tmp < core_get_pmc_count() )
+                core_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] =
+                    ((global_ctrl >> tmp ) & 1) &
+                    ((msr_content & (1<<22)) >> 22);
+        }
+
+    }
+
+    if ( core_vpmu_cxt->pmu_enable->fixed_ctr_enable[0] == 1 ||
+         core_vpmu_cxt->pmu_enable->fixed_ctr_enable[1] == 1 ||
+         core_vpmu_cxt->pmu_enable->fixed_ctr_enable[2] == 1 )
+        vpmu->flags |= VPMU_RUNNING;
+    else
+    {
+        int i;
+        for (i=0; i < core_get_pmc_count(); i++)
+        {
+            if ( core_vpmu_cxt->pmu_enable->arch_pmc_enable[i] == 1 )
+            {
+                vpmu->flags |= VPMU_RUNNING;
+                break;
+            }
+        }
+        if ( i == core_get_pmc_count() )
+            vpmu->flags &= ~VPMU_RUNNING;
+    }
+    /* Setup LVTPC in local apic */
+    if ( vpmu->flags & VPMU_RUNNING &&
+         is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU);
+    else
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+
+    core_vpmu_save_msr_context(v, type, index, msr_content);
+    if ( type != MSR_TYPE_GLOBAL )
+        wrmsrl(ecx, msr_content);
+    else
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+
+    return 1;
+}
+
+static int core_vpmu_do_rdmsr(struct cpu_user_regs *regs) {
+    u64 msr_content = 0;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = NULL;
+
+    if ( !core_vpmu_msr_common_check(regs->ecx, &type, &index) )
+        return 0;
+
+    core_vpmu_cxt = vpmu->context;
+    switch (regs->ecx) {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        break;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        msr_content = core_vpmu_cxt->global_ovf_status;
+        break;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &msr_content);
+        break;
+    default:
+        rdmsrl(regs->ecx, msr_content);
+    }
+
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static int core_vpmu_do_interrupt(struct cpu_user_regs *regs) {
+    struct vcpu *v = current;
+    u64 msr_content;
+    u32 vlapic_lvtpc;
+    unsigned char int_vec;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = vpmu->context;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+    if ( !msr_content )
+        return 0;
+    core_vpmu_cxt->global_ovf_status |= msr_content;
+    wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0xC000000700000003);
+
+    apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+    if ( !is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
+        return 1;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+    int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
+    vlapic_set_reg(vlapic, APIC_LVTPC, vlapic_lvtpc | APIC_LVT_MASKED);
+    if (GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED)
+        vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
+    else
+        test_and_set_bool(v->nmi_pending);
+    return 1;
+}
+
+static void core_vpmu_initialise(struct vcpu *v)
+{
+}
+
+static void core_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core_vpmu_context *core_vpmu_cxt = vpmu->context;
+
+    if ( !vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        return;
+    xfree(core_vpmu_cxt->pmu_enable);
+    xfree(vpmu->context);
+    if ( cpu_has_vmx_msr_bitmap )
+        core_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+    release_pmu_ownship(PMU_OWNER_HVM);
+}
+
+struct arch_vpmu_struct core_vpmu = {
+    .do_wrmsr = core_vpmu_do_wrmsr,
+    .do_rdmsr = core_vpmu_do_rdmsr,
+    .do_interrupt = core_vpmu_do_interrupt,
+    .arch_vpmu_initialise = core_vpmu_initialise,
+    .arch_vpmu_destroy = core_vpmu_destroy,
+    .arch_vpmu_save = core_vpmu_save,
+    .arch_vpmu_load = core_vpmu_load
+};
+
diff -r 8f0cbfc478d6 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/i8259.c	Thu Dec 13 23:47:33 2007 +0800
@@ -72,6 +72,7 @@ BUILD_SMP_INTERRUPT(apic_timer_interrupt
 BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 
 #define IRQ(x,y) \
diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/oprofile/nmi_int.c	Thu Dec 13 23:47:33 2007 +0800
@@ -291,18 +291,27 @@ static int __init p4_init(char ** cpu_ty
 }
 
 
+extern int ppro_has_global_ctrl;
 static int __init ppro_init(char ** cpu_type)
 {
 	__u8 cpu_model = current_cpu_data.x86_model;
 
-	if (cpu_model > 15) {
+	if (cpu_model > 15 && cpu_model != 23) {
 		printk("xenoprof: Initialization failed. "
 		       "Intel processor model %d for P6 class family is not "
 		       "supported\n", cpu_model);
 		return 0;
 	}
+	else if (cpu_model == 23)
+    {
+        ppro_has_global_ctrl = 1;
+		*cpu_type = "i386/core_2";
+    }
 	else if (cpu_model == 15)
+    {
+        ppro_has_global_ctrl = 1;
 		*cpu_type = "i386/core_2";
+    }
 	else if (cpu_model == 14)
 		*cpu_type = "i386/core";
 	else if (cpu_model == 9)
diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
--- a/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 23:47:33 2007 +0800
@@ -41,6 +41,7 @@
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
 static unsigned long reset_value[NUM_COUNTERS];
+int ppro_has_global_ctrl = 0;
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
@@ -126,6 +127,11 @@ static void ppro_start(struct op_msrs co
 	CTRL_READ(low, high, msrs, 0);
 	CTRL_SET_ACTIVE(low);
 	CTRL_WRITE(low, high, msrs, 0);
+    /* Global Control MSR is enabled by default when system power on.
+     * However, this may not hold true when xenoprof starts to run.
+     */
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 }
 
 
@@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
 	CTRL_READ(low, high, msrs, 0);
 	CTRL_SET_INACTIVE(low);
 	CTRL_WRITE(low, high, msrs, 0);
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 }
 
 
diff -r 8f0cbfc478d6 xen/common/xenoprof.c
--- a/xen/common/xenoprof.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/common/xenoprof.c	Thu Dec 13 23:47:33 2007 +0800
@@ -22,6 +22,10 @@
 /* Lock protecting the following global state */
 static DEFINE_SPINLOCK(xenoprof_lock);
 
+static DEFINE_SPINLOCK(pmu_owner_lock);
+int pmu_owner = 0;
+int pmu_hvm_refcount = 0;
+
 static struct domain *active_domains[MAX_OPROF_DOMAINS];
 static int active_ready[MAX_OPROF_DOMAINS];
 static unsigned int adomains;
@@ -42,6 +46,37 @@ static u64 passive_samples;
 static u64 passive_samples;
 static u64 idle_samples;
 static u64 others_samples;
+
+int acquire_pmu_ownership(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_owner == PMU_OWNER_NONE )
+    {
+        pmu_owner = pmu_ownship;
+        goto out;
+    }
+
+    if ( pmu_owner == pmu_ownship )
+        goto out;
+
+    spin_unlock(&pmu_owner_lock);
+    return 0;
+ out:
+    if ( pmu_owner == PMU_OWNER_HVM )
+        pmu_hvm_refcount++;
+    spin_unlock(&pmu_owner_lock);
+    return 1;
+}
+
+void release_pmu_ownship(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_ownship == PMU_OWNER_HVM )
+        pmu_hvm_refcount--;
+    if ( !pmu_hvm_refcount )
+        pmu_owner = PMU_OWNER_NONE;
+    spin_unlock(&pmu_owner_lock);
+}
 
 int is_active(struct domain *d)
 {
@@ -648,6 +683,11 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
         break;
 
     case XENOPROF_get_buffer:
+        if ( !acquire_pmu_ownership(PMU_OWNER_XENOPROF) )
+        {
+            ret = -EBUSY;
+            break;
+        }
         ret = xenoprof_op_get_buffer(arg);
         break;
 
@@ -769,6 +809,7 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
             break;
         x = current->domain->xenoprof;
         unshare_xenoprof_page_with_guest(x);
+        release_pmu_ownship(PMU_OWNER_XENOPROF);
         break;
     }
 
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/hvm.h	Thu Dec 13 23:47:33 2007 +0800
@@ -124,6 +124,7 @@ struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_pending)(struct vcpu *v);
+    int  (*do_pmu_interrupt)(struct cpu_user_regs *regs);
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
@@ -246,6 +247,11 @@ static inline int hvm_event_pending(stru
 static inline int hvm_event_pending(struct vcpu *v)
 {
     return hvm_funcs.event_pending(v);
+}
+
+static inline int hvm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return hvm_funcs.do_pmu_interrupt(regs);
 }
 
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/vlapic.h	Fri Dec 14 00:03:54 2007 +0800
@@ -71,6 +71,12 @@ static inline void vlapic_set_reg(
     *((uint32_t *)(&vlapic->regs->data[reg])) = val;
 }
 
+static inline int is_vlapic_lvtpc_enabled(struct vlapic *vlapic)
+{
+    return vlapic_enabled(vlapic) &&
+           !(vlapic_get_reg(vlapic, APIC_LVTPC) & APIC_LVT_MASKED);
+}
+
 int vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig);
 
 int vlapic_find_highest_irr(struct vlapic *vlapic);
@@ -92,4 +98,5 @@ struct vlapic *apic_round_robin(
 
 int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
 
+
 #endif /* __ASM_X86_HVM_VLAPIC_H__ */
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Thu Dec 13 23:47:33 2007 +0800
@@ -22,6 +22,7 @@
 #include <asm/config.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/vmx/cpu.h>
+#include <asm/hvm/vmx/vpmu.h>
 
 #ifdef VMXASSIST
 #include <public/hvm/vmx_assist.h>
@@ -75,6 +76,9 @@ struct arch_vmx_struct {
 
     /* Cache of cpu execution control. */
     u32                  exec_control;
+
+    /* PMU */
+    struct vpmu_struct   vpmu;
 
 #ifdef __x86_64__
     struct vmx_msr_state msr_state;
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vpmu.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu.h	Thu Dec 13 23:47:33 2007 +0800
@@ -0,0 +1,83 @@
+/*
+ * pmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000)
+#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.hvm_vcpu.u.vmx.vpmu)
+#define vpmu_vcpu(vpmu)   (container_of((vpmu), struct vcpu, \
+                                          arch.hvm_vcpu.u.vmx.vpmu))
+#define vpmu_domain(vpmu) (vpmu_vcpu(vpmu)->domain)
+
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+#define LVTPC_HVM_PMU            0xf8
+
+struct pmumsr {
+    unsigned int num;
+    u32 *msr;
+};
+
+struct msr_load_store_entry {
+    u32 msr_index;
+    u32 msr_reserved;
+    u64 msr_data;
+};
+
+/* Arch specific definitions shared by all vpmus */
+struct arch_vpmu_struct {
+    int (*do_wrmsr)(struct cpu_user_regs *regs);
+    int (*do_rdmsr)(struct cpu_user_regs *regs);
+    int (*do_interrupt)(struct cpu_user_regs *regs);
+    void (*arch_vpmu_initialise)(struct vcpu *v);
+    void (*arch_vpmu_destroy)(struct vcpu *v);
+    void (*arch_vpmu_save)(struct vcpu *v);
+    void (*arch_vpmu_load)(struct vcpu *v);
+};
+
+struct vpmu_struct {
+    u32 flags;
+    void *context;
+    struct arch_vpmu_struct *arch_vpmu;
+};
+
+#define VPMU_CONTEXT_ALLOCATED              0x1
+#define VPMU_CONTEXT_LOADED                 0x2
+#define VPMU_RUNNING                        0x4
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs);
+void inline vpmu_initialise(struct vcpu *v);
+void inline vpmu_destroy(struct vcpu *v);
+void inline vpmu_save(void *info);
+void inline vpmu_load(struct vcpu *v);
+
+extern int acquire_pmu_ownership(int pmu_ownership);
+extern void release_pmu_ownership(int pmu_ownership);
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vpmu_core.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu_core.h	Thu Dec 13 23:47:33 2007 +0800
@@ -0,0 +1,74 @@
+
+/*
+ * vpmu_core.c: CORE Arch specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_CORE_H_
+#define __ASM_X86_HVM_VPMU_CORE_H_
+
+/* Core 2 Non-architectual Performance Counter MSRs. */
+u32 core_counters_msr[] =   {
+    MSR_CORE_PERF_FIXED_CTR0,
+    MSR_CORE_PERF_FIXED_CTR1,
+    MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core_ctrls_msr[] = {
+    MSR_CORE_PERF_FIXED_CTR_CTRL,
+    MSR_IA32_PEBS_ENABLE,
+    MSR_IA32_DS_AREA};
+
+/* Core 2 global control MSRs. */
+u32 core_global_msr[] = {
+    MSR_CORE_PERF_GLOBAL_CTRL,
+    MSR_CORE_PERF_GLOBAL_STATUS,
+    MSR_CORE_PERF_GLOBAL_OVF_CTRL};
+
+struct pmumsr core_counters = {
+    3,
+    core_counters_msr
+};
+
+struct pmumsr core_ctrls = {
+    3,
+    core_ctrls_msr
+};
+
+struct arch_msr_pair {
+    u64 counter;
+    u64 control;
+};
+
+struct core_pmu_enable {
+    char fixed_ctr_enable[3];
+    char arch_pmc_enable[1];
+};
+
+struct core_vpmu_context {
+    struct core_pmu_enable *pmu_enable;
+    u64 counters[3];
+    u64 ctrls[3];
+    u64 global_ovf_status;
+    u32 hw_lapic_lvtpc;
+    struct arch_msr_pair arch_msr_pair[1];
+};
+
+#endif /* __ASM_X86_HVM_VPMU_CORE_H_ */
+
diff -r 8f0cbfc478d6 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/irq.h	Thu Dec 13 23:47:33 2007 +0800
@@ -28,6 +28,7 @@ fastcall void call_function_interrupt(vo
 fastcall void call_function_interrupt(void);
 fastcall void apic_timer_interrupt(void);
 fastcall void error_interrupt(void);
+fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 
diff -r 8f0cbfc478d6 xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h	Thu Dec 13 23:47:33 2007 +0800
@@ -9,13 +9,14 @@
 #define CALL_FUNCTION_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR	0xfa
 #define LOCAL_TIMER_VECTOR	0xf9
+#define PMU_APIC_VECTOR		0xf8
 
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR	0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf8
+#define LAST_HIPRIORITY_VECTOR  0xf7
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR	0xe0
diff -r 8f0cbfc478d6 xen/include/xen/xenoprof.h
--- a/xen/include/xen/xenoprof.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/xen/xenoprof.h	Thu Dec 13 23:47:33 2007 +0800
@@ -69,4 +69,10 @@ int xenoprof_add_trace(struct domain *d,
 int xenoprof_add_trace(struct domain *d, struct vcpu *v, 
                        unsigned long eip, int mode);
 
+#define PMU_OWNER_NONE          0
+#define PMU_OWNER_XENOPROF      1
+#define PMU_OWNER_HVM           2
+int acquire_pmu_ownship(int pmu_ownership);
+void release_pmu_ownship(int pmu_ownership);
+
 #endif  /* __XEN__XENOPROF_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest
  2007-12-14  7:49       ` Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest Shan, Haitao
@ 2007-12-14  8:31         ` Keir Fraser
  2007-12-14  9:32           ` Shan, Haitao
  0 siblings, 1 reply; 14+ messages in thread
From: Keir Fraser @ 2007-12-14  8:31 UTC (permalink / raw)
  To: Shan, Haitao; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 4627 bytes --]

The code style is still odd, but that’s easily fixed. Coding style in files
derived from Linux is often Linux style (including hard tabs). Otherwise
look in e.g., common/domain.c for Xen coding style.

I don’t understand why the MSR-write handler is so complicated, and tracks a
bunch of stuff, yet you appear to give direct access to all the MSRs by
clearing bits in the MSR bitmap. Is the MSR-write function more complicated
than it needs to be? Or is direct MSR access by the guest unsafe?

Is the virtualisation for Core, or Core 2, or both?

I don’t think you need to statically allocate a PMU_VECTOR. request_irq()
yourself a vector when VMX is initialised at boot time. This will avoid
touching a bunch of generic files.

 -- Keir

On 14/12/07 07:49, "Shan, Haitao" <haitao.shan@intel.com> wrote:

> Hi, Keir,
>  
> Thanks for your detailed comments. I have worked out an updated patch.
>  
> I removed my own MSR macros definitions and hard TAB indentation. Also, I
> removed LVTPC write-through in vlapic_write. Now, only when guest both enables
> counting and interrupt, physical LVTPC is written. And when vcpu is scheduled
> out, LVTPC is masked.
> In addition, I employed a "first come, first service" policy to grant PMU
> access to xenoprof/hvm_guest. When access is granted to hvm guest, oprofile
> will get "Device Busy" . On the contrary, guest can not use PMU like before.
> HVM save/restore is tested, the patch will not break current code.
>  
> Can you have a look and give me your comments? Thanks in advance!
> Best Regards 
> Haitao Shan 
> 
>  
> 
> 
> From: xen-devel-bounces@lists.xensource.com
> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Keir Fraser
> Sent: 2007年12月11日 23:02
> To: Shan, Haitao
> Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
> Subject: [Xen-devel] Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM
> guest
> 
> Oh yes, I see you save/restore MSR state and LVTPC across context switch.
> That’s fine then. But I don’t think you should pass through any of the virtual
> LVTPC register fields at all, except to toggle the mask field of the real
> LVTPC depending on whether or not performance counter interrupt delivery is
> currently enabled for the VCPU (I’d implement this just to avoid unnecessary
> real interrupts which are certainly not going to turn into virtual
> interrupts). There are no real LVTPC fields which should really be under guest
> control, and so cooking the guest LVTPC value and poking it into the real
> LVTPC in the vlapic code looks odd. As does save/restore of the whole LVTPC on
> context switch -- at most you should need to track masked/not-masked. And
> you’re letting the guest have access to reserved bits of the real LVTPC, which
> is not good...
> 
> Ignore my comment about save/restore ― I misread your context-switching code
> as HVM save/restore code!
> 
>  -- Keir
> 
> On 11/12/07 13:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:
> 
>> * Poking the LVTPC as a one-off event only when the  VCPU writes its virtual
>> LVTPC is not going to fly. What if the VCPU is  subsequently migrated to a
>> different physical CPU? In any case the LVTPC is a  shared resource that
>> needs synchronised access via e.g., context switching or  mutual exclusion
>> (see the next point below).
>> I think in  the patch I will save/load LVTPC during context switch.
>> 
>>  *  How does this interact with, for example, xenoprof? Ideally we should be
>> able  to run xenoprof and this concurrently over disjoint subsets of domains.
>> At the  very least xenoprof and this patch should avoid stomping on each
>> other by  implementing some form of mutual exclusion. Probably some resource
>> sharing  (e.g., the PMC interrupt vector) also is possible. I expect this
>> will require  some design thought, but unfortunately that is the price for
>> being second into  the tree.
>> Yes. How to share PMU resources will need  careful design. But I really don’t
>> think it possible to run both concurrently  unless we do some sort of PMC’s
>> partition. Of course, it is silly of me not to  implement a mechanism for
>> mutual exclusion. I will implement one.
>> 
>>  * Impact on save/restore to/from Core-2 processors: has  this been tested at
>> all?
>> I will try doing the test. I  think whether it should support save/restore
>> can be argued. Does anyone want  to run VTune/Oprofile during save/restore?
>> This can be hardly a good usage  model. But at least, I update it to ensure
>> it does not break current  save/restore.
> 
> 



[-- Attachment #1.2: Type: text/html, Size: 6316 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest
  2007-12-14  8:31         ` Keir Fraser
@ 2007-12-14  9:32           ` Shan, Haitao
  2007-12-14  9:54             ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-14  9:32 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong

[-- Attachment #1.1: Type: text/plain, Size: 6272 bytes --]

Thanks for your quick reply. I will fully check my coding style, sorry for that.
Please see my comments embedded, thanks.

Best Regards 
Haitao Shan 

________________________________

From: Keir Fraser [mailto:Keir.Fraser@cl.cam.ac.uk] 
Sent: 2007年12月14日 16:31
To: Shan, Haitao
Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest

The code style is still odd, but that’s easily fixed. Coding style in files derived from Linux is often Linux style (including hard tabs). Otherwise look in e.g., common/domain.c for Xen coding style. 

I don’t understand why the MSR-write handler is so complicated, and tracks a bunch of stuff, yet you appear to give direct access to all the MSRs by clearing bits in the MSR bitmap. Is the MSR-write function more complicated than it needs to be? Or is direct MSR access by the guest unsafe? 
Guest can directly access counter MSRs and read-access non-global-control MSRs. So guest write to control MSRs are always trapped. The reason is:
We want to implement a "lazy load" mechanism. But only when none of the counters are running, should "lazy load" be employed. If counters are running, we must load its contents every time the vcpu is scheduled back to run. Currently each counter has two control MSRs: a single bit in global control and a dedicate MSR. Those code will calculate which one is enabled. The result can be used to do the "lazy load" staff.
 Another approach is calculate the running status of all the counters at context switching time. But from my observation, vcpu migration tends to be quite frequent. However, the times that guest writes to control MSRs are much less. So I decide it is better to do it in msr write handler.

Is the virtualisation for Core, or Core 2, or both? 
Only Core 2. The reason is that Core do not have global control MSR. This MSR is the only one which will use VMX's HW capability to save and load on vmentry/vmexit. The benefit is the all the other MSRs can be handled with software flexibility, like the "lazy load" mechanism.

I don’t think you need to statically allocate a PMU_VECTOR. request_irq() yourself a vector when VMX is initialised at boot time. This will avoid touching a bunch of generic files. 
But request_irq can not ensure PMU can be assigned with a high vector. High vector will help to handle PMIs in time so that gain accurate performance data. 

 -- Keir

On 14/12/07 07:49, "Shan, Haitao" <haitao.shan@intel.com> wrote:

	Hi, Keir,

	Thanks for your detailed comments. I have worked out an updated patch.

	I removed my own MSR macros definitions and hard TAB indentation. Also, I removed LVTPC write-through in vlapic_write. Now, only when guest both enables counting and interrupt, physical LVTPC is written. And when vcpu is scheduled out, LVTPC is masked.
	In addition, I employed a "first come, first service" policy to grant PMU access to xenoprof/hvm_guest. When access is granted to hvm guest, oprofile will get "Device Busy" . On the contrary, guest can not use PMU like before.
	HVM save/restore is tested, the patch will not break current code.

	Can you have a look and give me your comments? Thanks in advance!
	Best Regards 
	Haitao Shan 

________________________________

	From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Keir Fraser
	Sent: 2007年12月11日 23:02
	To: Shan, Haitao
	Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
	Subject: [Xen-devel] Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest

	Oh yes, I see you save/restore MSR state and LVTPC across context switch. That’s fine then. But I don’t think you should pass through any of the virtual LVTPC register fields at all, except to toggle the mask field of the real LVTPC depending on whether or not performance counter interrupt delivery is currently enabled for the VCPU (I’d implement this just to avoid unnecessary real interrupts which are certainly not going to turn into virtual interrupts). There are no real LVTPC fields which should really be under guest control, and so cooking the guest LVTPC value and poking it into the real LVTPC in the vlapic code looks odd. As does save/restore of the whole LVTPC on context switch -- at most you should need to track masked/not-masked. And you’re letting the guest have access to reserved bits of the real LVTPC, which is not good...

	Ignore my comment about save/restore ― I misread your context-switching code as HVM save/restore code!

	 -- Keir

	On 11/12/07 13:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:

		* Poking the LVTPC as a one-off event only when the  VCPU writes its virtual LVTPC is not going to fly. What if the VCPU is  subsequently migrated to a different physical CPU? In any case the LVTPC is a  shared resource that needs synchronised access via e.g., context switching or  mutual exclusion (see the next point below).
		I think in  the patch I will save/load LVTPC during context switch. 

		 *  How does this interact with, for example, xenoprof? Ideally we should be able  to run xenoprof and this concurrently over disjoint subsets of domains. At the  very least xenoprof and this patch should avoid stomping on each other by  implementing some form of mutual exclusion. Probably some resource sharing  (e.g., the PMC interrupt vector) also is possible. I expect this will require  some design thought, but unfortunately that is the price for being second into  the tree.
		Yes. How to share PMU resources will need  careful design. But I really don’t think it possible to run both concurrently  unless we do some sort of PMC’s partition. Of course, it is silly of me not to  implement a mechanism for mutual exclusion. I will implement one.  

		 * Impact on save/restore to/from Core-2 processors: has  this been tested at all?
		I will try doing the test. I  think whether it should support save/restore can be argued. Does anyone want  to run VTune/Oprofile during save/restore? This can be hardly a good usage  model. But at least, I update it to ensure it does not break current  save/restore.

[-- Attachment #1.2: Type: text/html, Size: 10754 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest
  2007-12-14  9:32           ` Shan, Haitao
@ 2007-12-14  9:54             ` Keir Fraser
  2007-12-14 10:01               ` Re: [PATCH] Enable Core 2 Duo PerformanceCounters " Shan, Haitao
  2007-12-14 15:43               ` Shan, Haitao
  0 siblings, 2 replies; 14+ messages in thread
From: Keir Fraser @ 2007-12-14  9:54 UTC (permalink / raw)
  To: Shan, Haitao; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 963 bytes --]

1. Shouldn¹t the family-specific file be called vpmu_core2.c then?

2. Ah yes, actually setup_irq()/request_irq() is not a suitable interface.
The current code is fine.

 -- Keir

On 14/12/07 09:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:

> Is the virtualisation for Core, or Core 2, or both?
> Only Core 2. The reason is that Core do not have global control MSR. This MSR
> is the only one which will use VMX's HW capability to save and load on
> vmentry/vmexit. The benefit is the all the other MSRs can be handled with
> software flexibility, like the "lazy load" mechanism.
> 
> I don¹t think you need to statically allocate a PMU_VECTOR. request_irq()
> yourself a vector when VMX is initialised at boot time. This will avoid
> touching a bunch of generic files.
> But request_irq can not ensure PMU can be assigned with a high vector. High
> vector will help to handle PMIs in time so that gain accurate performance
> data. 



[-- Attachment #1.2: Type: text/html, Size: 1477 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest
  2007-12-14  9:54             ` Keir Fraser
@ 2007-12-14 10:01               ` Shan, Haitao
  2007-12-14 15:43               ` Shan, Haitao
  1 sibling, 0 replies; 14+ messages in thread
From: Shan, Haitao @ 2007-12-14 10:01 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 1453 bytes --]

Thanks, Keir!
I will rename the file. Actually I mean the Core Architecture here. But it will definitly cause confusions.

Best Regards 
Haitao Shan 

 

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Keir Fraser
Sent: 2007年12月14日 17:55
To: Shan, Haitao
Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest


1. Shouldn’t the family-specific file be called vpmu_core2.c then?

2. Ah yes, actually setup_irq()/request_irq() is not a suitable interface. The current code is fine.

 -- Keir

On 14/12/07 09:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:



	Is the virtualisation for Core, or Core 2, or both? 
	Only Core 2. The reason is that Core do not have global control MSR. This MSR is the only one which will use VMX's HW capability to save and load on vmentry/vmexit. The benefit is the all the other MSRs can be handled with software flexibility, like the "lazy load" mechanism.
	
	I don’t think you need to statically allocate a PMU_VECTOR. request_irq() yourself a vector when VMX is initialised at boot time. This will avoid touching a bunch of generic files. 
	But request_irq can not ensure PMU can be assigned with a high vector. High vector will help to handle PMIs in time so that gain accurate performance data. 
	




[-- Attachment #1.2: Type: text/html, Size: 2743 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest
  2007-12-14  9:54             ` Keir Fraser
  2007-12-14 10:01               ` Re: [PATCH] Enable Core 2 Duo PerformanceCounters " Shan, Haitao
@ 2007-12-14 15:43               ` Shan, Haitao
  2007-12-14 19:05                 ` Otavio Salvador
  1 sibling, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-14 15:43 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 1524 bytes --]

Hi, Keir,

 

I have carefully checked my coding style according to your comments. 

For those which contained hard TABs originally, I only updated what was touched by my patch.

Do you have any comments?

 

Thanks!

 

Haitao Shan

 

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Keir Fraser
Sent: 2007年12月14日 17:55
To: Shan, Haitao
Cc: xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest

 

1. Shouldn’t the family-specific file be called vpmu_core2.c then?

2. Ah yes, actually setup_irq()/request_irq() is not a suitable interface. The current code is fine.

 -- Keir

On 14/12/07 09:32, "Shan, Haitao" <haitao.shan@intel.com> wrote:

Is the virtualisation for Core, or Core 2, or both? 
Only Core 2. The reason is that Core do not have global control MSR. This MSR is the only one which will use VMX's HW capability to save and load on vmentry/vmexit. The benefit is the all the other MSRs can be handled with software flexibility, like the "lazy load" mechanism.

I don’t think you need to statically allocate a PMU_VECTOR. request_irq() yourself a vector when VMX is initialised at boot time. This will avoid touching a bunch of generic files. 
But request_irq can not ensure PMU can be assigned with a high vector. High vector will help to handle PMIs in time so that gain accurate performance data. 

 


[-- Attachment #1.2: Type: text/html, Size: 6449 bytes --]

[-- Attachment #2: pmu.patch --]
[-- Type: application/octet-stream, Size: 38605 bytes --]

diff -r 8f0cbfc478d6 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/apic.c	Fri Dec 14 07:21:43 2007 +0800
@@ -93,6 +93,9 @@ void __init apic_intr_init(void)
     /* IPI vectors for APIC spurious and error interrupts */
     set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
     set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+    /* Performance Counters Interrupt */
+    set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
     /* thermal monitor LVT interrupt */
 #ifdef CONFIG_X86_MCE_P4THERMAL
@@ -1227,6 +1230,16 @@ fastcall void smp_error_interrupt(struct
 }
 
 /*
+ * This interrupt handles performance counters interrupt
+ */
+
+fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs)
+{
+    ack_APIC_irq();
+    hvm_do_pmu_interrupt(regs);
+}
+
+/*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Fri Dec 14 07:21:43 2007 +0800
@@ -867,6 +867,11 @@ static int svm_event_pending(struct vcpu
     return vmcb->eventinj.fields.v;
 }
 
+static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+
 static struct hvm_function_table svm_function_table = {
     .name                 = "SVM",
     .cpu_down             = svm_cpu_down,
@@ -890,7 +895,8 @@ static struct hvm_function_table svm_fun
     .set_tsc_offset       = svm_set_tsc_offset,
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
-    .event_pending        = svm_event_pending
+    .event_pending        = svm_event_pending,
+    .do_pmu_interrupt     = svm_do_pmu_interrupt
 };
 
 int start_svm(struct cpuinfo_x86 *c)
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/Makefile	Fri Dec 14 07:36:51 2007 +0800
@@ -9,3 +9,5 @@ endif
 endif
 obj-y += vmcs.o
 obj-y += vmx.o
+obj-y += vpmu.o
+obj-y += vpmu_core2.o
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Dec 14 07:21:43 2007 +0800
@@ -90,6 +90,8 @@ static int vmx_vcpu_initialise(struct vc
         return rc;
     }
 
+    vpmu_initialise(v);
+
     vmx_install_vlapic_mapping(v);
 
 #ifndef VMXASSIST
@@ -104,6 +106,7 @@ static void vmx_vcpu_destroy(struct vcpu
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
     vmx_destroy_vmcs(v);
+    vpmu_destroy(v);
 }
 
 #ifdef __x86_64__
@@ -742,6 +745,7 @@ static void vmx_ctxt_switch_from(struct 
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
+    vpmu_save(v);
 }
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -752,6 +756,7 @@ static void vmx_ctxt_switch_to(struct vc
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vpmu_load(v);
 }
 
 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
@@ -1129,6 +1134,11 @@ static int vmx_event_pending(struct vcpu
 {
     ASSERT(v == current);
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
+}
+
+static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return vpmu_do_interrupt(regs);
 }
 
 static struct hvm_function_table vmx_function_table = {
@@ -1154,6 +1164,7 @@ static struct hvm_function_table vmx_fun
     .inject_exception     = vmx_inject_exception,
     .init_hypercall_page  = vmx_init_hypercall_page,
     .event_pending        = vmx_event_pending,
+    .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
 };
@@ -1312,7 +1323,6 @@ void vmx_cpuid_intercept(
 
     case 0x00000006:
     case 0x00000009:
-    case 0x0000000A:
         *eax = *ebx = *ecx = *edx = 0;
         break;
 
@@ -2395,7 +2405,15 @@ static int vmx_do_msr_read(struct cpu_us
         /* No point in letting the guest see real MCEs */
         msr_content = 0;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+        /* Debug Trace Store is not supported. */
+        msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+                       MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+        break;
     default:
+        if ( vpmu_do_rdmsr(regs) )
+            goto done;
         switch ( long_mode_do_msr_read(regs) )
         {
             case HNDL_unhandled:
@@ -2602,6 +2620,8 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
+        if ( vpmu_do_wrmsr(regs) )
+            return 1;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2651,6 +2671,7 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_call_function_interrupt(void);
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
+    fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
 #ifdef CONFIG_X86_MCE_P4THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
@@ -2680,6 +2701,9 @@ static void vmx_do_extint(struct cpu_use
         break;
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
+        break;
+    case PMU_APIC_VECTOR:
+        smp_pmu_apic_interrupt(regs);
         break;
 #ifdef CONFIG_X86_MCE_P4THERMAL
     case THERMAL_APIC_VECTOR:
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vpmu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu.c	Fri Dec 14 07:29:30 2007 +0800
@@ -0,0 +1,119 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_wrmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_rdmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_interrupt(regs);
+    return 0;
+}
+
+void vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+}
+
+void vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
+}
+
+extern struct arch_vpmu_ops core2_vpmu_ops;
+void inline vpmu_initialise(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* If it is not a fresh initialization, release all resources
+     * before initialise again.
+     */
+    if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        vpmu_destroy(v);
+
+    if ( current_cpu_data.x86 == 6 )
+    {
+        switch ( current_cpu_data.x86_model )
+        {
+        case 15:
+        case 23:
+            vpmu->arch_vpmu_ops = &core2_vpmu_ops;
+            dprintk(XENLOG_INFO,
+                   "Core 2 duo CPU detected for guest PMU usage.\n");
+            break;
+        }
+    }
+
+    if ( !vpmu->arch_vpmu_ops )
+    {
+        dprintk(XENLOG_WARNING, "Unsupport CPU model for guest PMU usage.\n");
+        return;
+    }
+
+    vpmu->flags = 0;
+    vpmu->context = NULL;
+    vpmu->arch_vpmu_ops->arch_vpmu_initialise(v);
+}
+
+void inline vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+}
+
diff -r 8f0cbfc478d6 xen/arch/x86/hvm/vmx/vpmu_core2.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c	Fri Dec 14 08:36:46 2007 +0800
@@ -0,0 +1,490 @@
+/*
+ * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core2.h>
+
+static int arch_pmc_cnt = 0;
+
+static int core2_get_pmc_count(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    if ( arch_pmc_cnt )
+        return arch_pmc_cnt;
+
+    cpuid(0xa, &eax, &ebx, &ecx, &edx);
+    return arch_pmc_cnt = (eax & 0xff00) >> 8;
+}
+
+static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+    int i;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        if ( core2_counters.msr[i] == msr_index )
+        {
+            *type = MSR_TYPE_COUNTER;
+            *index = i;
+            return 1;
+        }
+    for ( i=0; i < core2_ctrls.num; i++ )
+        if ( core2_ctrls.msr[i] == msr_index )
+        {
+            *type = MSR_TYPE_CTRL;
+            *index = i;
+            return 1;
+        }
+
+    if ( msr_index == MSR_CORE_PERF_GLOBAL_CTRL ||
+         msr_index == MSR_CORE_PERF_GLOBAL_STATUS ||
+         msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL )
+    {
+        *type = MSR_TYPE_GLOBAL;
+        return 1;
+    }
+
+    if ( msr_index >= MSR_IA32_PERFCTR0 &&
+         msr_index < MSR_IA32_PERFCTR0 + core2_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_COUNTER;
+        *index = msr_index - MSR_IA32_PERFCTR0;
+        return 1;
+    }
+    if ( msr_index >= MSR_P6_EVNTSEL0 &&
+         msr_index < MSR_P6_EVNTSEL0 + core2_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_CTRL;
+        *index = msr_index - MSR_P6_EVNTSEL0;
+        return 1;
+    }
+    return 0;
+}
+
+static void core2_vpmu_set_msr_bitmap(char *msr_bitmap)
+{
+    int i;
+
+    /* Allow Read/Write PMU Counters MSR Directly. */
+    for ( i=0; i < core2_counters.num; i++ )
+    {
+        clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+    {
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global Controls Directly. */
+    for ( i=0; i < core2_ctrls.num; i++ )
+        clear_bit(msraddr_to_bitpos(core2_ctrls.msr[i]), msr_bitmap);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static void core2_vpmu_unset_msr_bitmap(char *msr_bitmap)
+{
+    int i;
+
+    /* Undo all the changes to msr bitmap. */
+    for ( i=0; i < core2_counters.num; i++ )
+    {
+        set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap);
+        set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+    {
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_ctrls.num; i++ )
+        set_bit(msraddr_to_bitpos(core2_ctrls.msr[i]), msr_bitmap);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static inline void __core2_vpmu_save(struct vcpu *v)
+{
+    int i;
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        rdmsrl(core2_counters.msr[i], core2_vpmu_cxt->counters[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter);
+    core2_vpmu_cxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
+    apic_write(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+}
+
+static void core2_vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_CONTEXT_LOADED)) )
+        return;
+
+    __core2_vpmu_save(v);
+
+    /* Unset PMU MSR bitmap to trap lazy load. */
+    if ( !(vpmu->flags & VPMU_RUNNING) && cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+    vpmu->flags &= ~VPMU_CONTEXT_LOADED;
+    return;
+}
+
+static inline void __core2_vpmu_load(struct vcpu *v)
+{
+    int i;
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        wrmsrl(core2_counters.msr[i], core2_vpmu_cxt->counters[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        wrmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter);
+
+    for ( i=0; i < core2_ctrls.num; i++ )
+        wrmsrl(core2_ctrls.msr[i], core2_vpmu_cxt->ctrls[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        wrmsrl(MSR_P6_EVNTSEL0+i, core2_vpmu_cxt->arch_msr_pair[i].control);
+
+    apic_write_around(APIC_LVTPC, core2_vpmu_cxt->hw_lapic_lvtpc);
+}
+
+static void core2_vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* Only when PMU is counting, we load PMU context immediately. */
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_RUNNING)) )
+        return;
+    __core2_vpmu_load(v);
+    vpmu->flags |= VPMU_CONTEXT_LOADED;
+}
+
+static int core2_vpmu_alloc_resource(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt;
+    struct core2_pmu_enable *pmu_enable;
+
+    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+        return 0;
+
+    wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+    if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+
+    if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+    vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, -1ULL);
+
+    pmu_enable = xmalloc_bytes(sizeof(struct core2_pmu_enable) +
+                 (core2_get_pmc_count()-1)*sizeof(char));
+    if ( !pmu_enable )
+        goto out1;
+    memset(pmu_enable, 0, sizeof(struct core2_pmu_enable) +
+                 (core2_get_pmc_count()-1)*sizeof(char));
+
+    core2_vpmu_cxt = xmalloc_bytes(sizeof(struct core2_vpmu_context) +
+                    (core2_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    if ( !core2_vpmu_cxt )
+        goto out2;
+    memset(core2_vpmu_cxt, 0, sizeof(struct core2_vpmu_context) +
+                    (core2_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    core2_vpmu_cxt->pmu_enable = pmu_enable;
+    vpmu->context = (void *)core2_vpmu_cxt;
+
+    return 1;
+ out2:
+    xfree(pmu_enable);
+ out1:
+    dprintk(XENLOG_WARNING, "Insufficient memory for PMU, PMU feature is \
+            unavailable on domain %d vcpu %d.\n",
+            v->vcpu_id, v->domain->domain_id);
+    return 0;
+}
+
+static void core2_vpmu_save_msr_context(struct vcpu *v, int type,
+                                       int index, u64 msr_data)
+{
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    switch ( type )
+    {
+    case MSR_TYPE_CTRL:
+        core2_vpmu_cxt->ctrls[index] = msr_data;
+        break;
+    case MSR_TYPE_ARCH_CTRL:
+        core2_vpmu_cxt->arch_msr_pair[index].control = msr_data;
+        break;
+    }
+}
+
+static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( !is_core2_vpmu_msr(msr_index, type, index) )
+        return 0;
+
+    if ( unlikely(!(vpmu->flags & VPMU_CONTEXT_ALLOCATED)) &&
+         !core2_vpmu_alloc_resource(current) )
+        return 0;
+    vpmu->flags |= VPMU_CONTEXT_ALLOCATED;
+
+    /* Do the lazy load staff. */
+    if ( !(vpmu->flags & VPMU_CONTEXT_LOADED) )
+    {
+        __core2_vpmu_load(current);
+        vpmu->flags |= VPMU_CONTEXT_LOADED;
+        if ( cpu_has_vmx_msr_bitmap )
+            core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+    }
+    return 1;
+}
+
+static int core2_vpmu_do_wrmsr(struct cpu_user_regs *regs)
+{
+    u32 ecx = regs->ecx;
+    u64 msr_content;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+
+    if ( !core2_vpmu_msr_common_check(ecx, &type, &index) )
+        return 0;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+    core2_vpmu_cxt = vpmu->context;
+    switch ( ecx )
+    {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        core2_vpmu_cxt->global_ovf_status &= ~msr_content;
+        return 1;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        dprintk(XENLOG_INFO, "Can not write readonly MSR: \
+                            MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+        vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
+        return 1;
+    case MSR_IA32_PEBS_ENABLE:
+        if ( msr_content & 1 )
+            dprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, \
+                    which is not supported.\n");
+        return 1;
+    case MSR_IA32_DS_AREA:
+        dprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+        return 1;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+    {
+        int i;
+        u64 non_global_ctrl;
+        u32 global_ctrl = msr_content;
+
+        for ( i=0; i<core2_get_pmc_count(); i++ )
+        {
+            rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl);
+            core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] =
+                    (global_ctrl & 1) &
+                    ((non_global_ctrl + (1<<22))>>22);
+            global_ctrl >>= 1;
+        }
+
+        rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl);
+        global_ctrl = msr_content >> 32;
+        for ( i=0; i < 3; i++ )
+        {
+            core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0);
+            non_global_ctrl >>= 4;
+            global_ctrl >>= 1;
+        }
+        break;
+    }
+    case MSR_CORE_PERF_FIXED_CTR_CTRL:
+    {
+        int i;
+        u32 fix_ctrl = msr_content;
+        u64 global_ctrl;
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+        global_ctrl >>= 32;
+        for ( i=0; i < 3; i++ )
+        {
+            core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                (global_ctrl & 1) & ((fix_ctrl & 0x3)? 1: 0);
+            fix_ctrl >>= 4;
+            global_ctrl >>=  1;
+        }
+        break;
+    }
+    default:
+    {
+        int tmp = ecx - MSR_P6_EVNTSEL0;
+        u64 global_ctrl;
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+        if ( tmp >= 0 && tmp < core2_get_pmc_count() )
+            core2_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] =
+                ((global_ctrl >> tmp ) & 1) &
+                ((msr_content & (1<<22)) >> 22);
+    }
+    }
+
+    if ( core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[0] == 1 ||
+         core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[1] == 1 ||
+         core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[2] == 1 )
+        vpmu->flags |= VPMU_RUNNING;
+    else
+    {
+        int i;
+        for ( i=0; i < core2_get_pmc_count(); i++ )
+        {
+            if ( core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] == 1 )
+            {
+                vpmu->flags |= VPMU_RUNNING;
+                break;
+            }
+        }
+        if ( i == core2_get_pmc_count() )
+            vpmu->flags &= ~VPMU_RUNNING;
+    }
+    /* Setup LVTPC in local apic */
+    if ( vpmu->flags & VPMU_RUNNING &&
+         is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU);
+    else
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+
+    core2_vpmu_save_msr_context(v, type, index, msr_content);
+    if ( type != MSR_TYPE_GLOBAL )
+        wrmsrl(ecx, msr_content);
+    else
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+
+    return 1;
+}
+
+static int core2_vpmu_do_rdmsr(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+
+    if ( !core2_vpmu_msr_common_check(regs->ecx, &type, &index) )
+        return 0;
+
+    core2_vpmu_cxt = vpmu->context;
+    switch ( regs->ecx )
+    {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        break;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        msr_content = core2_vpmu_cxt->global_ovf_status;
+        break;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &msr_content);
+        break;
+    default:
+        rdmsrl(regs->ecx, msr_content);
+    }
+
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    u64 msr_content;
+    u32 vlapic_lvtpc;
+    unsigned char int_vec;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+    if ( !msr_content )
+        return 0;
+    core2_vpmu_cxt->global_ovf_status |= msr_content;
+    wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0xC000000700000003);
+
+    apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+    if ( !is_vlapic_lvtpc_enabled(vlapic) )
+        return 1;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+    int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
+    vlapic_set_reg(vlapic, APIC_LVTPC, vlapic_lvtpc | APIC_LVT_MASKED);
+    if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
+        vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
+    else
+        test_and_set_bool(v->nmi_pending);
+    return 1;
+}
+
+static void core2_vpmu_initialise(struct vcpu *v)
+{
+}
+
+static void core2_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
+
+    if ( !vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        return;
+    xfree(core2_vpmu_cxt->pmu_enable);
+    xfree(vpmu->context);
+    if ( cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+    release_pmu_ownship(PMU_OWNER_HVM);
+}
+
+struct arch_vpmu_ops core2_vpmu_ops = {
+    .do_wrmsr = core2_vpmu_do_wrmsr,
+    .do_rdmsr = core2_vpmu_do_rdmsr,
+    .do_interrupt = core2_vpmu_do_interrupt,
+    .arch_vpmu_initialise = core2_vpmu_initialise,
+    .arch_vpmu_destroy = core2_vpmu_destroy,
+    .arch_vpmu_save = core2_vpmu_save,
+    .arch_vpmu_load = core2_vpmu_load
+};
+
diff -r 8f0cbfc478d6 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/i8259.c	Fri Dec 14 07:21:43 2007 +0800
@@ -72,6 +72,7 @@ BUILD_SMP_INTERRUPT(apic_timer_interrupt
 BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 
 #define IRQ(x,y) \
diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/oprofile/nmi_int.c	Fri Dec 14 07:50:28 2007 +0800
@@ -291,18 +291,27 @@ static int __init p4_init(char ** cpu_ty
 }
 
 
+extern int ppro_has_global_ctrl;
 static int __init ppro_init(char ** cpu_type)
 {
 	__u8 cpu_model = current_cpu_data.x86_model;
 
-	if (cpu_model > 15) {
-		printk("xenoprof: Initialization failed. "
-		       "Intel processor model %d for P6 class family is not "
-		       "supported\n", cpu_model);
-		return 0;
-	}
-	else if (cpu_model == 15)
-		*cpu_type = "i386/core_2";
+    if (cpu_model > 15 && cpu_model != 23) {
+        printk("xenoprof: Initialization failed. "
+               "Intel processor model %d for P6 class family is not "
+               "supported\n", cpu_model);
+        return 0;
+    }
+    else if (cpu_model == 23)
+    {
+        ppro_has_global_ctrl = 1;
+        *cpu_type = "i386/core_2";
+    }
+    else if (cpu_model == 15)
+    {
+        ppro_has_global_ctrl = 1;
+        *cpu_type = "i386/core_2";
+    }
 	else if (cpu_model == 14)
 		*cpu_type = "i386/core";
 	else if (cpu_model == 9)
diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
--- a/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/arch/x86/oprofile/op_model_ppro.c	Fri Dec 14 07:21:43 2007 +0800
@@ -41,6 +41,7 @@
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
 static unsigned long reset_value[NUM_COUNTERS];
+int ppro_has_global_ctrl = 0;
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
@@ -126,6 +127,11 @@ static void ppro_start(struct op_msrs co
 	CTRL_READ(low, high, msrs, 0);
 	CTRL_SET_ACTIVE(low);
 	CTRL_WRITE(low, high, msrs, 0);
+    /* Global Control MSR is enabled by default when system power on.
+     * However, this may not hold true when xenoprof starts to run.
+     */
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 }
 
 
@@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
 	CTRL_READ(low, high, msrs, 0);
 	CTRL_SET_INACTIVE(low);
 	CTRL_WRITE(low, high, msrs, 0);
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 }
 
 
diff -r 8f0cbfc478d6 xen/common/xenoprof.c
--- a/xen/common/xenoprof.c	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/common/xenoprof.c	Fri Dec 14 07:21:43 2007 +0800
@@ -22,6 +22,10 @@
 /* Lock protecting the following global state */
 static DEFINE_SPINLOCK(xenoprof_lock);
 
+static DEFINE_SPINLOCK(pmu_owner_lock);
+int pmu_owner = 0;
+int pmu_hvm_refcount = 0;
+
 static struct domain *active_domains[MAX_OPROF_DOMAINS];
 static int active_ready[MAX_OPROF_DOMAINS];
 static unsigned int adomains;
@@ -42,6 +46,37 @@ static u64 passive_samples;
 static u64 passive_samples;
 static u64 idle_samples;
 static u64 others_samples;
+
+int acquire_pmu_ownership(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_owner == PMU_OWNER_NONE )
+    {
+        pmu_owner = pmu_ownship;
+        goto out;
+    }
+
+    if ( pmu_owner == pmu_ownship )
+        goto out;
+
+    spin_unlock(&pmu_owner_lock);
+    return 0;
+ out:
+    if ( pmu_owner == PMU_OWNER_HVM )
+        pmu_hvm_refcount++;
+    spin_unlock(&pmu_owner_lock);
+    return 1;
+}
+
+void release_pmu_ownship(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_ownship == PMU_OWNER_HVM )
+        pmu_hvm_refcount--;
+    if ( !pmu_hvm_refcount )
+        pmu_owner = PMU_OWNER_NONE;
+    spin_unlock(&pmu_owner_lock);
+}
 
 int is_active(struct domain *d)
 {
@@ -648,6 +683,11 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
         break;
 
     case XENOPROF_get_buffer:
+        if ( !acquire_pmu_ownership(PMU_OWNER_XENOPROF) )
+        {
+            ret = -EBUSY;
+            break;
+        }
         ret = xenoprof_op_get_buffer(arg);
         break;
 
@@ -769,6 +809,7 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
             break;
         x = current->domain->xenoprof;
         unshare_xenoprof_page_with_guest(x);
+        release_pmu_ownship(PMU_OWNER_XENOPROF);
         break;
     }
 
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/hvm.h	Fri Dec 14 07:21:43 2007 +0800
@@ -124,6 +124,7 @@ struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_pending)(struct vcpu *v);
+    int  (*do_pmu_interrupt)(struct cpu_user_regs *regs);
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
@@ -246,6 +247,11 @@ static inline int hvm_event_pending(stru
 static inline int hvm_event_pending(struct vcpu *v)
 {
     return hvm_funcs.event_pending(v);
+}
+
+static inline int hvm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return hvm_funcs.do_pmu_interrupt(regs);
 }
 
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/vlapic.h	Fri Dec 14 07:22:01 2007 +0800
@@ -71,6 +71,12 @@ static inline void vlapic_set_reg(
     *((uint32_t *)(&vlapic->regs->data[reg])) = val;
 }
 
+static inline int is_vlapic_lvtpc_enabled(struct vlapic *vlapic)
+{
+    return vlapic_enabled(vlapic) &&
+           !(vlapic_get_reg(vlapic, APIC_LVTPC) & APIC_LVT_MASKED);
+}
+
 int vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig);
 
 int vlapic_find_highest_irr(struct vlapic *vlapic);
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Fri Dec 14 07:21:43 2007 +0800
@@ -22,6 +22,7 @@
 #include <asm/config.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/vmx/cpu.h>
+#include <asm/hvm/vmx/vpmu.h>
 
 #ifdef VMXASSIST
 #include <public/hvm/vmx_assist.h>
@@ -75,6 +76,9 @@ struct arch_vmx_struct {
 
     /* Cache of cpu execution control. */
     u32                  exec_control;
+
+    /* PMU */
+    struct vpmu_struct   vpmu;
 
 #ifdef __x86_64__
     struct vmx_msr_state msr_state;
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vpmu.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu.h	Fri Dec 14 07:21:43 2007 +0800
@@ -0,0 +1,83 @@
+/*
+ * vpmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000)
+#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.hvm_vcpu.u.vmx.vpmu)
+#define vpmu_vcpu(vpmu)   (container_of((vpmu), struct vcpu, \
+                                          arch.hvm_vcpu.u.vmx.vpmu))
+#define vpmu_domain(vpmu) (vpmu_vcpu(vpmu)->domain)
+
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+#define LVTPC_HVM_PMU            0xf8
+
+struct pmumsr {
+    unsigned int num;
+    u32 *msr;
+};
+
+struct msr_load_store_entry {
+    u32 msr_index;
+    u32 msr_reserved;
+    u64 msr_data;
+};
+
+/* Arch specific operations shared by all vpmus */
+struct arch_vpmu_ops {
+    int (*do_wrmsr)(struct cpu_user_regs *regs);
+    int (*do_rdmsr)(struct cpu_user_regs *regs);
+    int (*do_interrupt)(struct cpu_user_regs *regs);
+    void (*arch_vpmu_initialise)(struct vcpu *v);
+    void (*arch_vpmu_destroy)(struct vcpu *v);
+    void (*arch_vpmu_save)(struct vcpu *v);
+    void (*arch_vpmu_load)(struct vcpu *v);
+};
+
+struct vpmu_struct {
+    u32 flags;
+    void *context;
+    struct arch_vpmu_ops *arch_vpmu_ops;
+};
+
+#define VPMU_CONTEXT_ALLOCATED              0x1
+#define VPMU_CONTEXT_LOADED                 0x2
+#define VPMU_RUNNING                        0x4
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs);
+void inline vpmu_initialise(struct vcpu *v);
+void inline vpmu_destroy(struct vcpu *v);
+void inline vpmu_save(struct vcpu *v);
+void inline vpmu_load(struct vcpu *v);
+
+extern int acquire_pmu_ownership(int pmu_ownership);
+extern void release_pmu_ownership(int pmu_ownership);
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+
diff -r 8f0cbfc478d6 xen/include/asm-x86/hvm/vmx/vpmu_core2.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu_core2.h	Fri Dec 14 07:52:47 2007 +0800
@@ -0,0 +1,68 @@
+
+/*
+ * vpmu_core2.h: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_CORE_H_
+#define __ASM_X86_HVM_VPMU_CORE_H_
+
+/* Core 2 Non-architectual Performance Counter MSRs. */
+u32 core2_counters_msr[] =   {
+    MSR_CORE_PERF_FIXED_CTR0,
+    MSR_CORE_PERF_FIXED_CTR1,
+    MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core2_ctrls_msr[] = {
+    MSR_CORE_PERF_FIXED_CTR_CTRL,
+    MSR_IA32_PEBS_ENABLE,
+    MSR_IA32_DS_AREA};
+
+struct pmumsr core2_counters = {
+    3,
+    core2_counters_msr
+};
+
+struct pmumsr core2_ctrls = {
+    3,
+    core2_ctrls_msr
+};
+
+struct arch_msr_pair {
+    u64 counter;
+    u64 control;
+};
+
+struct core2_pmu_enable {
+    char fixed_ctr_enable[3];
+    char arch_pmc_enable[1];
+};
+
+struct core2_vpmu_context {
+    struct core2_pmu_enable *pmu_enable;
+    u64 counters[3];
+    u64 ctrls[3];
+    u64 global_ovf_status;
+    u32 hw_lapic_lvtpc;
+    struct arch_msr_pair arch_msr_pair[1];
+};
+
+#endif /* __ASM_X86_HVM_VPMU_CORE_H_ */
+
diff -r 8f0cbfc478d6 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/irq.h	Fri Dec 14 07:21:43 2007 +0800
@@ -28,6 +28,7 @@ fastcall void call_function_interrupt(vo
 fastcall void call_function_interrupt(void);
 fastcall void apic_timer_interrupt(void);
 fastcall void error_interrupt(void);
+fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 
diff -r 8f0cbfc478d6 xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h	Fri Dec 14 08:12:04 2007 +0800
@@ -9,13 +9,14 @@
 #define CALL_FUNCTION_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR	0xfa
 #define LOCAL_TIMER_VECTOR	0xf9
+#define PMU_APIC_VECTOR 	0xf8
 
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR	0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf8
+#define LAST_HIPRIORITY_VECTOR  0xf7
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR	0xe0
diff -r 8f0cbfc478d6 xen/include/xen/xenoprof.h
--- a/xen/include/xen/xenoprof.h	Thu Dec 13 09:31:03 2007 +0000
+++ b/xen/include/xen/xenoprof.h	Fri Dec 14 07:21:43 2007 +0800
@@ -69,4 +69,10 @@ int xenoprof_add_trace(struct domain *d,
 int xenoprof_add_trace(struct domain *d, struct vcpu *v, 
                        unsigned long eip, int mode);
 
+#define PMU_OWNER_NONE          0
+#define PMU_OWNER_XENOPROF      1
+#define PMU_OWNER_HVM           2
+int acquire_pmu_ownship(int pmu_ownership);
+void release_pmu_ownship(int pmu_ownership);
+
 #endif  /* __XEN__XENOPROF_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest
  2007-12-14 15:43               ` Shan, Haitao
@ 2007-12-14 19:05                 ` Otavio Salvador
  2007-12-15  1:09                   ` Shan, Haitao
  0 siblings, 1 reply; 14+ messages in thread
From: Otavio Salvador @ 2007-12-14 19:05 UTC (permalink / raw)
  To: Shan, Haitao; +Cc: xen-devel, Jiang, Yunhong

"Shan, Haitao" <haitao.shan@intel.com> writes:

Hello Haitao,

I've done a look on the patch and found two places with wrong coding
style. Take a look bellow:

> diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
> --- a/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 09:31:03 2007 +0000
> +++ b/xen/arch/x86/oprofile/op_model_ppro.c	Fri Dec 14 07:21:43 2007 +0800
> @@ -126,6 +127,11 @@ static void ppro_start(struct op_msrs co
>  	CTRL_READ(low, high, msrs, 0);
>  	CTRL_SET_ACTIVE(low);
>  	CTRL_WRITE(low, high, msrs, 0);
> +    /* Global Control MSR is enabled by default when system power on.
> +     * However, this may not hold true when xenoprof starts to run.
> +     */
> +    if ( ppro_has_global_ctrl )
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
>  }
>  
>  
> @@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
>  	CTRL_READ(low, high, msrs, 0);
>  	CTRL_SET_INACTIVE(low);
>  	CTRL_WRITE(low, high, msrs, 0);
> +    if ( ppro_has_global_ctrl )
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>  }

Cheers,

-- 
Otavio Salvador                  O.S. Systems
E-mail: otavio@ossystems.com.br  http://www.ossystems.com.br
Mobile: +55 53 9981-7854         http://projetos.ossystems.com.br

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest
  2007-12-14 19:05                 ` Otavio Salvador
@ 2007-12-15  1:09                   ` Shan, Haitao
  2007-12-17  7:22                     ` Re: [PATCH] Enable Core 2 Duo PerformanceCountersinHVM guest Shan, Haitao
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-15  1:09 UTC (permalink / raw)
  To: Otavio Salvador; +Cc: xen-devel, Jiang, Yunhong

Hi, Salvador,

Do you mean the strange indentation around CTRL_WRITE? That is because there are three hard TABs before CTRL_WRITE. I am not sure whether I should fix the original code, so I just leave them there untouched.

Haitao Shan

-----Original Message-----
From: Otavio Salvador [mailto:otavio@ossystems.com.br] 
Sent: 2007年12月15日 3:05
To: Shan, Haitao
Cc: Keir Fraser; xen-devel@lists.xensource.com; Jiang, Yunhong
Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo PerformanceCounters inHVM guest

"Shan, Haitao" <haitao.shan@intel.com> writes:

Hello Haitao,

I've done a look on the patch and found two places with wrong coding
style. Take a look bellow:

> diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
> --- a/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 09:31:03 2007 +0000
> +++ b/xen/arch/x86/oprofile/op_model_ppro.c	Fri Dec 14 07:21:43 2007 +0800
> @@ -126,6 +127,11 @@ static void ppro_start(struct op_msrs co
>  	CTRL_READ(low, high, msrs, 0);
>  	CTRL_SET_ACTIVE(low);
>  	CTRL_WRITE(low, high, msrs, 0);
> +    /* Global Control MSR is enabled by default when system power on.
> +     * However, this may not hold true when xenoprof starts to run.
> +     */
> +    if ( ppro_has_global_ctrl )
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
>  }
>  
>  
> @@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
>  	CTRL_READ(low, high, msrs, 0);
>  	CTRL_SET_INACTIVE(low);
>  	CTRL_WRITE(low, high, msrs, 0);
> +    if ( ppro_has_global_ctrl )
> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>  }

Cheers,

-- 
Otavio Salvador                  O.S. Systems
E-mail: otavio@ossystems.com.br  http://www.ossystems.com.br
Mobile: +55 53 9981-7854         http://projetos.ossystems.com.br

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: Re: [PATCH] Enable Core 2 Duo PerformanceCountersinHVM guest
  2007-12-15  1:09                   ` Shan, Haitao
@ 2007-12-17  7:22                     ` Shan, Haitao
  2007-12-17  8:55                       ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Shan, Haitao @ 2007-12-17  7:22 UTC (permalink / raw)
  To: Shan, Haitao, Otavio Salvador; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 2010 bytes --]

Hi, Keir,
 
I have baked an updated patch. In this patch, I changed some logic to shorten the lengthy part of core2_vpmu_do_wrmsr. And also, I did a rebase, since there is one new changeset recently which made changes to xenoprofile.
Can you have a look?
 <<pmu.patch>> 
Best Regards
Haitao Shan

Shan, Haitao wrote:
> Hi, Salvador,
> 
> Do you mean the strange indentation around CTRL_WRITE? That is
> because there are three hard TABs before CTRL_WRITE. I am not sure
> whether I should fix the original code, so I just leave them there
> untouched.   
> 
> Haitao Shan
> 
> -----Original Message-----
> From: Otavio Salvador [mailto:otavio@ossystems.com.br]
> Sent: 2007年12月15日 3:05
> To: Shan, Haitao
> Cc: Keir Fraser; xen-devel@lists.xensource.com; Jiang, Yunhong
> Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo
> PerformanceCounters inHVM guest 
> 
> "Shan, Haitao" <haitao.shan@intel.com> writes:
> 
> Hello Haitao,
> 
> I've done a look on the patch and found two places with wrong coding
> style. Take a look bellow:
> 
>> diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
>> --- a/xen/arch/x86/oprofile/op_model_ppro.c	Thu Dec 13 09:31:03 2007
>> +0000 +++ b/xen/arch/x86/oprofile/op_model_ppro.c	Fri Dec 14
>> 07:21:43 2007 +0800 @@ -126,6 +127,11 @@ static void
>>  	ppro_start(struct op_msrs co CTRL_READ(low, high, msrs, 0);
>>  	CTRL_SET_ACTIVE(low);
>>  	CTRL_WRITE(low, high, msrs, 0);
>> +    /* Global Control MSR is enabled by default when system power
>> on. +     * However, this may not hold true when xenoprof starts to
>> run. +     */ +    if ( ppro_has_global_ctrl )
>> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
>>  }
>> 
>> 
>> @@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
>>  	CTRL_READ(low, high, msrs, 0);
>>  	CTRL_SET_INACTIVE(low);
>>  	CTRL_WRITE(low, high, msrs, 0);
>> +    if ( ppro_has_global_ctrl )
>> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>>  }
> 
> Cheers,

[-- Attachment #1.2: Type: text/html, Size: 7329 bytes --]

[-- Attachment #2: pmu.patch --]
[-- Type: application/octet-stream, Size: 38139 bytes --]

diff -r 966a6d3b7408 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/apic.c	Mon Dec 17 22:35:58 2007 +0800
@@ -93,6 +93,9 @@ void __init apic_intr_init(void)
     /* IPI vectors for APIC spurious and error interrupts */
     set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
     set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+    /* Performance Counters Interrupt */
+    set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
 
     /* thermal monitor LVT interrupt */
 #ifdef CONFIG_X86_MCE_P4THERMAL
@@ -1227,6 +1230,16 @@ fastcall void smp_error_interrupt(struct
 }
 
 /*
+ * This interrupt handles performance counters interrupt
+ */
+
+fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs)
+{
+    ack_APIC_irq();
+    hvm_do_pmu_interrupt(regs);
+}
+
+/*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
diff -r 966a6d3b7408 xen/arch/x86/hvm/svm/svm.c
--- a/xen/arch/x86/hvm/svm/svm.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/hvm/svm/svm.c	Mon Dec 17 22:35:58 2007 +0800
@@ -860,6 +860,11 @@ static int svm_event_pending(struct vcpu
     return vmcb->eventinj.fields.v;
 }
 
+static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return 0;
+}
+
 static struct hvm_function_table svm_function_table = {
     .name                 = "SVM",
     .cpu_down             = svm_cpu_down,
@@ -882,7 +887,8 @@ static struct hvm_function_table svm_fun
     .set_tsc_offset       = svm_set_tsc_offset,
     .inject_exception     = svm_inject_exception,
     .init_hypercall_page  = svm_init_hypercall_page,
-    .event_pending        = svm_event_pending
+    .event_pending        = svm_event_pending,
+    .do_pmu_interrupt     = svm_do_pmu_interrupt
 };
 
 int start_svm(struct cpuinfo_x86 *c)
diff -r 966a6d3b7408 xen/arch/x86/hvm/vmx/Makefile
--- a/xen/arch/x86/hvm/vmx/Makefile	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/Makefile	Mon Dec 17 22:35:58 2007 +0800
@@ -9,3 +9,5 @@ endif
 endif
 obj-y += vmcs.o
 obj-y += vmx.o
+obj-y += vpmu.o
+obj-y += vpmu_core2.o
diff -r 966a6d3b7408 xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Dec 17 22:35:58 2007 +0800
@@ -90,6 +90,8 @@ static int vmx_vcpu_initialise(struct vc
         return rc;
     }
 
+    vpmu_initialise(v);
+
     vmx_install_vlapic_mapping(v);
 
 #ifndef VMXASSIST
@@ -104,6 +106,7 @@ static void vmx_vcpu_destroy(struct vcpu
 static void vmx_vcpu_destroy(struct vcpu *v)
 {
     vmx_destroy_vmcs(v);
+    vpmu_destroy(v);
 }
 
 #ifdef __x86_64__
@@ -742,6 +745,7 @@ static void vmx_ctxt_switch_from(struct 
     vmx_save_guest_msrs(v);
     vmx_restore_host_msrs();
     vmx_save_dr(v);
+    vpmu_save(v);
 }
 
 static void vmx_ctxt_switch_to(struct vcpu *v)
@@ -752,6 +756,7 @@ static void vmx_ctxt_switch_to(struct vc
 
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
+    vpmu_load(v);
 }
 
 static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
@@ -1117,6 +1122,11 @@ static int vmx_event_pending(struct vcpu
 {
     ASSERT(v == current);
     return (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK);
+}
+
+static int vmx_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return vpmu_do_interrupt(regs);
 }
 
 static struct hvm_function_table vmx_function_table = {
@@ -1141,6 +1151,7 @@ static struct hvm_function_table vmx_fun
     .inject_exception     = vmx_inject_exception,
     .init_hypercall_page  = vmx_init_hypercall_page,
     .event_pending        = vmx_event_pending,
+    .do_pmu_interrupt     = vmx_do_pmu_interrupt,
     .cpu_up               = vmx_cpu_up,
     .cpu_down             = vmx_cpu_down,
 };
@@ -1299,7 +1310,6 @@ void vmx_cpuid_intercept(
 
     case 0x00000006:
     case 0x00000009:
-    case 0x0000000A:
         *eax = *ebx = *ecx = *edx = 0;
         break;
 
@@ -2382,7 +2392,15 @@ static int vmx_do_msr_read(struct cpu_us
         /* No point in letting the guest see real MCEs */
         msr_content = 0;
         break;
+    case MSR_IA32_MISC_ENABLE:
+        rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
+        /* Debug Trace Store is not supported. */
+        msr_content |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+                       MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
+        break;
     default:
+        if ( vpmu_do_rdmsr(regs) )
+            goto done;
         switch ( long_mode_do_msr_read(regs) )
         {
             case HNDL_unhandled:
@@ -2589,6 +2607,8 @@ static int vmx_do_msr_write(struct cpu_u
     case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
         goto gp_fault;
     default:
+        if ( vpmu_do_wrmsr(regs) )
+            return 1;
         switch ( long_mode_do_msr_write(regs) )
         {
             case HNDL_unhandled:
@@ -2638,6 +2658,7 @@ static void vmx_do_extint(struct cpu_use
     fastcall void smp_call_function_interrupt(void);
     fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
     fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
+    fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
 #ifdef CONFIG_X86_MCE_P4THERMAL
     fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
 #endif
@@ -2667,6 +2688,9 @@ static void vmx_do_extint(struct cpu_use
         break;
     case ERROR_APIC_VECTOR:
         smp_error_interrupt(regs);
+        break;
+    case PMU_APIC_VECTOR:
+        smp_pmu_apic_interrupt(regs);
         break;
 #ifdef CONFIG_X86_MCE_P4THERMAL
     case THERMAL_APIC_VECTOR:
diff -r 966a6d3b7408 xen/arch/x86/hvm/vmx/vpmu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu.c	Mon Dec 17 22:35:58 2007 +0800
@@ -0,0 +1,119 @@
+/*
+ * vpmu.c: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_wrmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_rdmsr(regs);
+    return 0;
+}
+
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( vpmu->arch_vpmu_ops )
+        return vpmu->arch_vpmu_ops->do_interrupt(regs);
+    return 0;
+}
+
+void vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_save(v);
+}
+
+void vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_load(v);
+}
+
+extern struct arch_vpmu_ops core2_vpmu_ops;
+void inline vpmu_initialise(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* If it is not a fresh initialization, release all resources
+     * before initialise again.
+     */
+    if ( vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        vpmu_destroy(v);
+
+    if ( current_cpu_data.x86 == 6 )
+    {
+        switch ( current_cpu_data.x86_model )
+        {
+        case 15:
+        case 23:
+            vpmu->arch_vpmu_ops = &core2_vpmu_ops;
+            dprintk(XENLOG_INFO,
+                   "Core 2 duo CPU detected for guest PMU usage.\n");
+            break;
+        }
+    }
+
+    if ( !vpmu->arch_vpmu_ops )
+    {
+        dprintk(XENLOG_WARNING, "Unsupport CPU model for guest PMU usage.\n");
+        return;
+    }
+
+    vpmu->flags = 0;
+    vpmu->context = NULL;
+    vpmu->arch_vpmu_ops->arch_vpmu_initialise(v);
+}
+
+void inline vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( vpmu->arch_vpmu_ops )
+        vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
+}
+
diff -r 966a6d3b7408 xen/arch/x86/hvm/vmx/vpmu_core2.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c	Mon Dec 17 22:58:28 2007 +0800
@@ -0,0 +1,469 @@
+/*
+ * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <asm/system.h>
+#include <asm/regs.h>
+#include <asm/types.h>
+#include <asm/msr.h>
+#include <asm/msr-index.h>
+#include <asm/hvm/support.h>
+#include <asm/hvm/vlapic.h>
+#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/vmx/vmcs.h>
+#include <public/sched.h>
+#include <public/hvm/save.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core2.h>
+
+static int arch_pmc_cnt = 0;
+
+static int core2_get_pmc_count(void)
+{
+    u32 eax, ebx, ecx, edx;
+
+    if ( arch_pmc_cnt )
+        return arch_pmc_cnt;
+
+    cpuid(0xa, &eax, &ebx, &ecx, &edx);
+    return arch_pmc_cnt = (eax & 0xff00) >> 8;
+}
+
+static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
+{
+    int i;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        if ( core2_counters.msr[i] == msr_index )
+        {
+            *type = MSR_TYPE_COUNTER;
+            *index = i;
+            return 1;
+        }
+    for ( i=0; i < core2_ctrls.num; i++ )
+        if ( core2_ctrls.msr[i] == msr_index )
+        {
+            *type = MSR_TYPE_CTRL;
+            *index = i;
+            return 1;
+        }
+
+    if ( msr_index == MSR_CORE_PERF_GLOBAL_CTRL ||
+         msr_index == MSR_CORE_PERF_GLOBAL_STATUS ||
+         msr_index == MSR_CORE_PERF_GLOBAL_OVF_CTRL )
+    {
+        *type = MSR_TYPE_GLOBAL;
+        return 1;
+    }
+
+    if ( msr_index >= MSR_IA32_PERFCTR0 &&
+         msr_index < MSR_IA32_PERFCTR0 + core2_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_COUNTER;
+        *index = msr_index - MSR_IA32_PERFCTR0;
+        return 1;
+    }
+    if ( msr_index >= MSR_P6_EVNTSEL0 &&
+         msr_index < MSR_P6_EVNTSEL0 + core2_get_pmc_count() )
+    {
+        *type = MSR_TYPE_ARCH_CTRL;
+        *index = msr_index - MSR_P6_EVNTSEL0;
+        return 1;
+    }
+    return 0;
+}
+
+static void core2_vpmu_set_msr_bitmap(char *msr_bitmap)
+{
+    int i;
+
+    /* Allow Read/Write PMU Counters MSR Directly. */
+    for ( i=0; i < core2_counters.num; i++ )
+    {
+        clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+    {
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        clear_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    /* Allow Read PMU Non-global Controls Directly. */
+    for ( i=0; i < core2_ctrls.num; i++ )
+        clear_bit(msraddr_to_bitpos(core2_ctrls.msr[i]), msr_bitmap);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        clear_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static void core2_vpmu_unset_msr_bitmap(char *msr_bitmap)
+{
+    int i;
+
+    /* Undo all the changes to msr bitmap. */
+    for ( i=0; i < core2_counters.num; i++ )
+    {
+        set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap);
+        set_bit(msraddr_to_bitpos(core2_counters.msr[i]), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+    {
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap);
+        set_bit(msraddr_to_bitpos(MSR_IA32_PERFCTR0+i), msr_bitmap + 0x800);
+    }
+    for ( i=0; i < core2_ctrls.num; i++ )
+        set_bit(msraddr_to_bitpos(core2_ctrls.msr[i]), msr_bitmap);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        set_bit(msraddr_to_bitpos(MSR_P6_EVNTSEL0+i), msr_bitmap);
+}
+
+static inline void __core2_vpmu_save(struct vcpu *v)
+{
+    int i;
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        rdmsrl(core2_counters.msr[i], core2_vpmu_cxt->counters[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        rdmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter);
+    core2_vpmu_cxt->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
+    apic_write(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+}
+
+static void core2_vpmu_save(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_CONTEXT_LOADED)) )
+        return;
+
+    __core2_vpmu_save(v);
+
+    /* Unset PMU MSR bitmap to trap lazy load. */
+    if ( !(vpmu->flags & VPMU_RUNNING) && cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+
+    vpmu->flags &= ~VPMU_CONTEXT_LOADED;
+    return;
+}
+
+static inline void __core2_vpmu_load(struct vcpu *v)
+{
+    int i;
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    for ( i=0; i < core2_counters.num; i++ )
+        wrmsrl(core2_counters.msr[i], core2_vpmu_cxt->counters[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        wrmsrl(MSR_IA32_PERFCTR0+i, core2_vpmu_cxt->arch_msr_pair[i].counter);
+
+    for ( i=0; i < core2_ctrls.num; i++ )
+        wrmsrl(core2_ctrls.msr[i], core2_vpmu_cxt->ctrls[i]);
+    for ( i=0; i < core2_get_pmc_count(); i++ )
+        wrmsrl(MSR_P6_EVNTSEL0+i, core2_vpmu_cxt->arch_msr_pair[i].control);
+
+    apic_write_around(APIC_LVTPC, core2_vpmu_cxt->hw_lapic_lvtpc);
+}
+
+static void core2_vpmu_load(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+    /* Only when PMU is counting, we load PMU context immediately. */
+    if ( !((vpmu->flags & VPMU_CONTEXT_ALLOCATED) &&
+           (vpmu->flags & VPMU_RUNNING)) )
+        return;
+    __core2_vpmu_load(v);
+    vpmu->flags |= VPMU_CONTEXT_LOADED;
+}
+
+static int core2_vpmu_alloc_resource(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt;
+    struct core2_pmu_enable *pmu_enable;
+
+    if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
+        return 0;
+
+    wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+    if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+
+    if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL) )
+        return 0;
+    vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, -1ULL);
+
+    pmu_enable = xmalloc_bytes(sizeof(struct core2_pmu_enable) +
+                 (core2_get_pmc_count()-1)*sizeof(char));
+    if ( !pmu_enable )
+        goto out1;
+    memset(pmu_enable, 0, sizeof(struct core2_pmu_enable) +
+                 (core2_get_pmc_count()-1)*sizeof(char));
+
+    core2_vpmu_cxt = xmalloc_bytes(sizeof(struct core2_vpmu_context) +
+                    (core2_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    if ( !core2_vpmu_cxt )
+        goto out2;
+    memset(core2_vpmu_cxt, 0, sizeof(struct core2_vpmu_context) +
+                    (core2_get_pmc_count()-1)*sizeof(struct arch_msr_pair));
+    core2_vpmu_cxt->pmu_enable = pmu_enable;
+    vpmu->context = (void *)core2_vpmu_cxt;
+
+    return 1;
+ out2:
+    xfree(pmu_enable);
+ out1:
+    dprintk(XENLOG_WARNING, "Insufficient memory for PMU, PMU feature is \
+            unavailable on domain %d vcpu %d.\n",
+            v->vcpu_id, v->domain->domain_id);
+    return 0;
+}
+
+static void core2_vpmu_save_msr_context(struct vcpu *v, int type,
+                                       int index, u64 msr_data)
+{
+    struct core2_vpmu_context *core2_vpmu_cxt = vcpu_vpmu(v)->context;
+
+    switch ( type )
+    {
+    case MSR_TYPE_CTRL:
+        core2_vpmu_cxt->ctrls[index] = msr_data;
+        break;
+    case MSR_TYPE_ARCH_CTRL:
+        core2_vpmu_cxt->arch_msr_pair[index].control = msr_data;
+        break;
+    }
+}
+
+static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(current);
+
+    if ( !is_core2_vpmu_msr(msr_index, type, index) )
+        return 0;
+
+    if ( unlikely(!(vpmu->flags & VPMU_CONTEXT_ALLOCATED)) &&
+         !core2_vpmu_alloc_resource(current) )
+        return 0;
+    vpmu->flags |= VPMU_CONTEXT_ALLOCATED;
+
+    /* Do the lazy load staff. */
+    if ( !(vpmu->flags & VPMU_CONTEXT_LOADED) )
+    {
+        __core2_vpmu_load(current);
+        vpmu->flags |= VPMU_CONTEXT_LOADED;
+        if ( cpu_has_vmx_msr_bitmap )
+            core2_vpmu_set_msr_bitmap(current->arch.hvm_vmx.msr_bitmap);
+    }
+    return 1;
+}
+
+static int core2_vpmu_do_wrmsr(struct cpu_user_regs *regs)
+{
+    u32 ecx = regs->ecx;
+    u64 msr_content, global_ctrl, non_global_ctrl;
+    char pmu_enable = 0;
+    int i, tmp;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+
+    if ( !core2_vpmu_msr_common_check(ecx, &type, &index) )
+        return 0;
+
+    msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+    core2_vpmu_cxt = vpmu->context;
+    switch ( ecx )
+    {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        core2_vpmu_cxt->global_ovf_status &= ~msr_content;
+        return 1;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        dprintk(XENLOG_INFO, "Can not write readonly MSR: \
+                            MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
+        vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
+        return 1;
+    case MSR_IA32_PEBS_ENABLE:
+        if ( msr_content & 1 )
+            dprintk(XENLOG_WARNING, "Guest is trying to enable PEBS, \
+                    which is not supported.\n");
+        return 1;
+    case MSR_IA32_DS_AREA:
+        dprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
+        return 1;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        global_ctrl = msr_content;
+        for ( i = 0; i < core2_get_pmc_count(); i++ )
+        {
+            rdmsrl(MSR_P6_EVNTSEL0+i, non_global_ctrl);
+            core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i] =
+                    global_ctrl & (non_global_ctrl >> 22) & 1;
+            global_ctrl >>= 1;
+        }
+
+        rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, non_global_ctrl);
+        global_ctrl = msr_content >> 32;
+        for ( i = 0; i < 3; i++ )
+        {
+            core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0);
+            non_global_ctrl >>= 4;
+            global_ctrl >>= 1;
+        }
+        break;
+    case MSR_CORE_PERF_FIXED_CTR_CTRL:
+        non_global_ctrl = msr_content;
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+        global_ctrl >>= 32;
+        for ( i = 0; i < 3; i++ )
+        {
+            core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i] =
+                (global_ctrl & 1) & ((non_global_ctrl & 0x3)? 1: 0);
+            non_global_ctrl >>= 4;
+            global_ctrl >>= 1;
+        }
+        break;
+    default:
+        tmp = ecx - MSR_P6_EVNTSEL0;
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &global_ctrl);
+        if ( tmp >= 0 && tmp < core2_get_pmc_count() )
+            core2_vpmu_cxt->pmu_enable->arch_pmc_enable[tmp] =
+                (global_ctrl >> tmp) & (msr_content >> 22) & 1;
+    }
+
+    for ( i = 0; i < 3; i++ )
+        pmu_enable |= core2_vpmu_cxt->pmu_enable->fixed_ctr_enable[i];
+    for ( i = 0; i < core2_get_pmc_count(); i++ )
+        pmu_enable |= core2_vpmu_cxt->pmu_enable->arch_pmc_enable[i];
+    if ( pmu_enable )
+        vpmu->flags |= VPMU_RUNNING;
+    else
+        vpmu->flags &= ~VPMU_RUNNING;
+
+    /* Setup LVTPC in local apic */
+    if ( vpmu->flags & VPMU_RUNNING &&
+         is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU);
+    else
+        apic_write_around(APIC_LVTPC, LVTPC_HVM_PMU | APIC_LVT_MASKED);
+
+    core2_vpmu_save_msr_context(v, type, index, msr_content);
+    if ( type != MSR_TYPE_GLOBAL )
+        wrmsrl(ecx, msr_content);
+    else
+        vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+
+    return 1;
+}
+
+static int core2_vpmu_do_rdmsr(struct cpu_user_regs *regs)
+{
+    u64 msr_content = 0;
+    int type = -1, index = -1;
+    struct vcpu *v = current;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = NULL;
+
+    if ( !core2_vpmu_msr_common_check(regs->ecx, &type, &index) )
+        return 0;
+
+    core2_vpmu_cxt = vpmu->context;
+    switch ( regs->ecx )
+    {
+    case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+        break;
+    case MSR_CORE_PERF_GLOBAL_STATUS:
+        msr_content = core2_vpmu_cxt->global_ovf_status;
+        break;
+    case MSR_CORE_PERF_GLOBAL_CTRL:
+        vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, &msr_content);
+        break;
+    default:
+        rdmsrl(regs->ecx, msr_content);
+    }
+
+    regs->eax = msr_content & 0xFFFFFFFF;
+    regs->edx = msr_content >> 32;
+    return 1;
+}
+
+static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+{
+    struct vcpu *v = current;
+    u64 msr_content;
+    u32 vlapic_lvtpc;
+    unsigned char int_vec;
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
+    struct vlapic *vlapic = vcpu_vlapic(v);
+
+    rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
+    if ( !msr_content )
+        return 0;
+    core2_vpmu_cxt->global_ovf_status |= msr_content;
+    wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0xC000000700000003);
+
+    apic_write_around(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+    if ( !is_vlapic_lvtpc_enabled(vlapic) )
+        return 1;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+    int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
+    vlapic_set_reg(vlapic, APIC_LVTPC, vlapic_lvtpc | APIC_LVT_MASKED);
+    if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
+        vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
+    else
+        test_and_set_bool(v->nmi_pending);
+    return 1;
+}
+
+static void core2_vpmu_initialise(struct vcpu *v)
+{
+}
+
+static void core2_vpmu_destroy(struct vcpu *v)
+{
+    struct vpmu_struct *vpmu = vcpu_vpmu(v);
+    struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
+
+    if ( !vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+        return;
+    xfree(core2_vpmu_cxt->pmu_enable);
+    xfree(vpmu->context);
+    if ( cpu_has_vmx_msr_bitmap )
+        core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
+    release_pmu_ownship(PMU_OWNER_HVM);
+}
+
+struct arch_vpmu_ops core2_vpmu_ops = {
+    .do_wrmsr = core2_vpmu_do_wrmsr,
+    .do_rdmsr = core2_vpmu_do_rdmsr,
+    .do_interrupt = core2_vpmu_do_interrupt,
+    .arch_vpmu_initialise = core2_vpmu_initialise,
+    .arch_vpmu_destroy = core2_vpmu_destroy,
+    .arch_vpmu_save = core2_vpmu_save,
+    .arch_vpmu_load = core2_vpmu_load
+};
+
diff -r 966a6d3b7408 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/i8259.c	Mon Dec 17 22:35:58 2007 +0800
@@ -72,6 +72,7 @@ BUILD_SMP_INTERRUPT(apic_timer_interrupt
 BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
 BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 
 #define IRQ(x,y) \
diff -r 966a6d3b7408 xen/arch/x86/oprofile/nmi_int.c
--- a/xen/arch/x86/oprofile/nmi_int.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/oprofile/nmi_int.c	Mon Dec 17 22:35:58 2007 +0800
@@ -291,18 +291,27 @@ static int __init p4_init(char ** cpu_ty
 }
 
 
+extern int ppro_has_global_ctrl;
 static int __init ppro_init(char ** cpu_type)
 {
 	__u8 cpu_model = current_cpu_data.x86_model;
 
-	if (cpu_model > 15) {
-		printk("xenoprof: Initialization failed. "
-		       "Intel processor model %d for P6 class family is not "
-		       "supported\n", cpu_model);
-		return 0;
-	}
-	else if (cpu_model == 15)
-		*cpu_type = "i386/core_2";
+    if (cpu_model > 15 && cpu_model != 23) {
+        printk("xenoprof: Initialization failed. "
+               "Intel processor model %d for P6 class family is not "
+               "supported\n", cpu_model);
+        return 0;
+    }
+    else if (cpu_model == 23)
+    {
+        ppro_has_global_ctrl = 1;
+        *cpu_type = "i386/core_2";
+    }
+    else if (cpu_model == 15)
+    {
+        ppro_has_global_ctrl = 1;
+        *cpu_type = "i386/core_2";
+    }
 	else if (cpu_model == 14)
 		*cpu_type = "i386/core";
 	else if (cpu_model == 9)
diff -r 966a6d3b7408 xen/arch/x86/oprofile/op_model_ppro.c
--- a/xen/arch/x86/oprofile/op_model_ppro.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/arch/x86/oprofile/op_model_ppro.c	Mon Dec 17 22:35:58 2007 +0800
@@ -41,6 +41,7 @@
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
 static unsigned long reset_value[NUM_COUNTERS];
+int ppro_has_global_ctrl = 0;
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
@@ -134,6 +135,11 @@ static void ppro_start(struct op_msrs co
 			CTRL_WRITE(low, high, msrs, i);
 		}
 	}
+    /* Global Control MSR is enabled by default when system power on.
+     * However, this may not hold true when xenoprof starts to run.
+     */
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, (1<<NUM_COUNTERS) - 1);
 }
 
 
@@ -149,6 +155,8 @@ static void ppro_stop(struct op_msrs con
 		CTRL_SET_INACTIVE(low);
 		CTRL_WRITE(low, high, msrs, i);
 	}
+    if ( ppro_has_global_ctrl )
+        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 }
 
 
diff -r 966a6d3b7408 xen/common/xenoprof.c
--- a/xen/common/xenoprof.c	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/common/xenoprof.c	Mon Dec 17 22:35:58 2007 +0800
@@ -23,6 +23,10 @@
 /* Lock protecting the following global state */
 static DEFINE_SPINLOCK(xenoprof_lock);
 
+static DEFINE_SPINLOCK(pmu_owner_lock);
+int pmu_owner = 0;
+int pmu_hvm_refcount = 0;
+
 static struct domain *active_domains[MAX_OPROF_DOMAINS];
 static int active_ready[MAX_OPROF_DOMAINS];
 static unsigned int adomains;
@@ -43,6 +47,37 @@ static u64 passive_samples;
 static u64 passive_samples;
 static u64 idle_samples;
 static u64 others_samples;
+
+int acquire_pmu_ownership(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_owner == PMU_OWNER_NONE )
+    {
+        pmu_owner = pmu_ownship;
+        goto out;
+    }
+
+    if ( pmu_owner == pmu_ownship )
+        goto out;
+
+    spin_unlock(&pmu_owner_lock);
+    return 0;
+ out:
+    if ( pmu_owner == PMU_OWNER_HVM )
+        pmu_hvm_refcount++;
+    spin_unlock(&pmu_owner_lock);
+    return 1;
+}
+
+void release_pmu_ownship(int pmu_ownship)
+{
+    spin_lock(&pmu_owner_lock);
+    if ( pmu_ownship == PMU_OWNER_HVM )
+        pmu_hvm_refcount--;
+    if ( !pmu_hvm_refcount )
+        pmu_owner = PMU_OWNER_NONE;
+    spin_unlock(&pmu_owner_lock);
+}
 
 int is_active(struct domain *d)
 {
@@ -649,6 +684,11 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
         break;
 
     case XENOPROF_get_buffer:
+        if ( !acquire_pmu_ownership(PMU_OWNER_XENOPROF) )
+        {
+            ret = -EBUSY;
+            break;
+        }
         ret = xenoprof_op_get_buffer(arg);
         break;
 
@@ -786,6 +826,7 @@ int do_xenoprof_op(int op, XEN_GUEST_HAN
             break;
         x = current->domain->xenoprof;
         unshare_xenoprof_page_with_guest(x);
+        release_pmu_ownship(PMU_OWNER_XENOPROF);
         break;
     }
 
diff -r 966a6d3b7408 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/asm-x86/hvm/hvm.h	Mon Dec 17 22:35:58 2007 +0800
@@ -119,6 +119,7 @@ struct hvm_function_table {
     void (*init_hypercall_page)(struct domain *d, void *hypercall_page);
 
     int  (*event_pending)(struct vcpu *v);
+    int  (*do_pmu_interrupt)(struct cpu_user_regs *regs);
 
     int  (*cpu_up)(void);
     void (*cpu_down)(void);
@@ -231,6 +232,11 @@ static inline int hvm_event_pending(stru
 static inline int hvm_event_pending(struct vcpu *v)
 {
     return hvm_funcs.event_pending(v);
+}
+
+static inline int hvm_do_pmu_interrupt(struct cpu_user_regs *regs)
+{
+    return hvm_funcs.do_pmu_interrupt(regs);
 }
 
 /* These reserved bits in lower 32 remain 0 after any load of CR0 */
diff -r 966a6d3b7408 xen/include/asm-x86/hvm/vlapic.h
--- a/xen/include/asm-x86/hvm/vlapic.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/asm-x86/hvm/vlapic.h	Mon Dec 17 22:35:58 2007 +0800
@@ -71,6 +71,12 @@ static inline void vlapic_set_reg(
     *((uint32_t *)(&vlapic->regs->data[reg])) = val;
 }
 
+static inline int is_vlapic_lvtpc_enabled(struct vlapic *vlapic)
+{
+    return vlapic_enabled(vlapic) &&
+           !(vlapic_get_reg(vlapic, APIC_LVTPC) & APIC_LVT_MASKED);
+}
+
 int vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig);
 
 int vlapic_has_pending_irq(struct vcpu *v);
diff -r 966a6d3b7408 xen/include/asm-x86/hvm/vmx/vmcs.h
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Mon Dec 17 22:35:58 2007 +0800
@@ -22,6 +22,7 @@
 #include <asm/config.h>
 #include <asm/hvm/io.h>
 #include <asm/hvm/vmx/cpu.h>
+#include <asm/hvm/vmx/vpmu.h>
 
 #ifdef VMXASSIST
 #include <public/hvm/vmx_assist.h>
@@ -75,6 +76,9 @@ struct arch_vmx_struct {
 
     /* Cache of cpu execution control. */
     u32                  exec_control;
+
+    /* PMU */
+    struct vpmu_struct   vpmu;
 
 #ifdef __x86_64__
     struct vmx_msr_state msr_state;
diff -r 966a6d3b7408 xen/include/asm-x86/hvm/vmx/vpmu.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu.h	Mon Dec 17 22:35:58 2007 +0800
@@ -0,0 +1,83 @@
+/*
+ * vpmu.h: PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_H_
+#define __ASM_X86_HVM_VPMU_H_
+
+#define msraddr_to_bitpos(x) (((x)&0xffff) + ((x)>>31)*0x2000)
+#define vcpu_vpmu(vcpu)   (&(vcpu)->arch.hvm_vcpu.u.vmx.vpmu)
+#define vpmu_vcpu(vpmu)   (container_of((vpmu), struct vcpu, \
+                                          arch.hvm_vcpu.u.vmx.vpmu))
+#define vpmu_domain(vpmu) (vpmu_vcpu(vpmu)->domain)
+
+#define MSR_TYPE_COUNTER            0
+#define MSR_TYPE_CTRL               1
+#define MSR_TYPE_GLOBAL             2
+#define MSR_TYPE_ARCH_COUNTER       3
+#define MSR_TYPE_ARCH_CTRL          4
+
+#define LVTPC_HVM_PMU            0xf8
+
+struct pmumsr {
+    unsigned int num;
+    u32 *msr;
+};
+
+struct msr_load_store_entry {
+    u32 msr_index;
+    u32 msr_reserved;
+    u64 msr_data;
+};
+
+/* Arch specific operations shared by all vpmus */
+struct arch_vpmu_ops {
+    int (*do_wrmsr)(struct cpu_user_regs *regs);
+    int (*do_rdmsr)(struct cpu_user_regs *regs);
+    int (*do_interrupt)(struct cpu_user_regs *regs);
+    void (*arch_vpmu_initialise)(struct vcpu *v);
+    void (*arch_vpmu_destroy)(struct vcpu *v);
+    void (*arch_vpmu_save)(struct vcpu *v);
+    void (*arch_vpmu_load)(struct vcpu *v);
+};
+
+struct vpmu_struct {
+    u32 flags;
+    void *context;
+    struct arch_vpmu_ops *arch_vpmu_ops;
+};
+
+#define VPMU_CONTEXT_ALLOCATED              0x1
+#define VPMU_CONTEXT_LOADED                 0x2
+#define VPMU_RUNNING                        0x4
+
+int inline vpmu_do_wrmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_rdmsr(struct cpu_user_regs *regs);
+int inline vpmu_do_interrupt(struct cpu_user_regs *regs);
+void inline vpmu_initialise(struct vcpu *v);
+void inline vpmu_destroy(struct vcpu *v);
+void inline vpmu_save(struct vcpu *v);
+void inline vpmu_load(struct vcpu *v);
+
+extern int acquire_pmu_ownership(int pmu_ownership);
+extern void release_pmu_ownership(int pmu_ownership);
+
+#endif /* __ASM_X86_HVM_VPMU_H_*/
+
diff -r 966a6d3b7408 xen/include/asm-x86/hvm/vmx/vpmu_core2.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/asm-x86/hvm/vmx/vpmu_core2.h	Mon Dec 17 22:35:58 2007 +0800
@@ -0,0 +1,68 @@
+
+/*
+ * vpmu_core2.h: CORE 2 specific PMU virtualization for HVM domain.
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Haitao Shan <haitao.shan@intel.com>
+ */
+
+#ifndef __ASM_X86_HVM_VPMU_CORE_H_
+#define __ASM_X86_HVM_VPMU_CORE_H_
+
+/* Core 2 Non-architectual Performance Counter MSRs. */
+u32 core2_counters_msr[] =   {
+    MSR_CORE_PERF_FIXED_CTR0,
+    MSR_CORE_PERF_FIXED_CTR1,
+    MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core2_ctrls_msr[] = {
+    MSR_CORE_PERF_FIXED_CTR_CTRL,
+    MSR_IA32_PEBS_ENABLE,
+    MSR_IA32_DS_AREA};
+
+struct pmumsr core2_counters = {
+    3,
+    core2_counters_msr
+};
+
+struct pmumsr core2_ctrls = {
+    3,
+    core2_ctrls_msr
+};
+
+struct arch_msr_pair {
+    u64 counter;
+    u64 control;
+};
+
+struct core2_pmu_enable {
+    char fixed_ctr_enable[3];
+    char arch_pmc_enable[1];
+};
+
+struct core2_vpmu_context {
+    struct core2_pmu_enable *pmu_enable;
+    u64 counters[3];
+    u64 ctrls[3];
+    u64 global_ovf_status;
+    u32 hw_lapic_lvtpc;
+    struct arch_msr_pair arch_msr_pair[1];
+};
+
+#endif /* __ASM_X86_HVM_VPMU_CORE_H_ */
+
diff -r 966a6d3b7408 xen/include/asm-x86/irq.h
--- a/xen/include/asm-x86/irq.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/asm-x86/irq.h	Mon Dec 17 22:35:58 2007 +0800
@@ -28,6 +28,7 @@ fastcall void call_function_interrupt(vo
 fastcall void call_function_interrupt(void);
 fastcall void apic_timer_interrupt(void);
 fastcall void error_interrupt(void);
+fastcall void pmu_apic_interrupt(void);
 fastcall void spurious_interrupt(void);
 fastcall void thermal_interrupt(void);
 
diff -r 966a6d3b7408 xen/include/asm-x86/mach-default/irq_vectors.h
--- a/xen/include/asm-x86/mach-default/irq_vectors.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h	Mon Dec 17 22:35:58 2007 +0800
@@ -9,13 +9,14 @@
 #define CALL_FUNCTION_VECTOR	0xfb
 #define THERMAL_APIC_VECTOR	0xfa
 #define LOCAL_TIMER_VECTOR	0xf9
+#define PMU_APIC_VECTOR 	0xf8
 
 /*
  * High-priority dynamically-allocated vectors. For interrupts that
  * must be higher priority than any guest-bound interrupt.
  */
 #define FIRST_HIPRIORITY_VECTOR	0xf0
-#define LAST_HIPRIORITY_VECTOR  0xf8
+#define LAST_HIPRIORITY_VECTOR  0xf7
 
 /* Legacy PIC uses vectors 0xe0-0xef. */
 #define FIRST_LEGACY_VECTOR	0xe0
diff -r 966a6d3b7408 xen/include/xen/xenoprof.h
--- a/xen/include/xen/xenoprof.h	Fri Dec 14 11:50:24 2007 +0000
+++ b/xen/include/xen/xenoprof.h	Mon Dec 17 22:35:58 2007 +0800
@@ -69,4 +69,10 @@ int xenoprof_add_trace(struct domain *d,
 int xenoprof_add_trace(struct domain *d, struct vcpu *v, 
                        unsigned long eip, int mode);
 
+#define PMU_OWNER_NONE          0
+#define PMU_OWNER_XENOPROF      1
+#define PMU_OWNER_HVM           2
+int acquire_pmu_ownship(int pmu_ownership);
+void release_pmu_ownship(int pmu_ownership);
+
 #endif  /* __XEN__XENOPROF_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Re: [PATCH] Enable Core 2 Duo PerformanceCountersinHVM guest
  2007-12-17  7:22                     ` Re: [PATCH] Enable Core 2 Duo PerformanceCountersinHVM guest Shan, Haitao
@ 2007-12-17  8:55                       ` Keir Fraser
  0 siblings, 0 replies; 14+ messages in thread
From: Keir Fraser @ 2007-12-17  8:55 UTC (permalink / raw)
  To: Shan, Haitao, Otavio Salvador; +Cc: xen-devel, Jiang, Yunhong


[-- Attachment #1.1: Type: text/plain, Size: 2547 bytes --]

I will. This isn’t for 3.2.0 though, so I may not look until after the 3.2.0
branch.

 -- Keir

On 17/12/07 07:22, "Shan, Haitao" <haitao.shan@intel.com> wrote:

> Hi, Keir, 
> ? 
> I have baked an updated patch. In this patch, I changed some logic?to shorten
> the lengthy part of core2_vpmu_do_wrmsr. And also, I did a rebase, since
> there?is?one?new changeset recently which made changes to xenoprofile.
> 
> Can you have a look?
>  <<pmu.patch>> 
> Best Regards 
> Haitao Shan 
> 
> Shan, Haitao wrote:
>> > Hi, Salvador, 
>> > 
>> > Do you mean the strange indentation around CTRL_WRITE? That is
>> > because there are three hard TABs before CTRL_WRITE. I am not sure
>> > whether I should fix the original code, so I just leave them there
>> > untouched.   
>> > 
>> > Haitao Shan 
>> > 
>> > -----Original Message-----
>> > From: Otavio Salvador [mailto:otavio@ossystems.com.br
>> <mailto:otavio@ossystems.com.br> ]
>> > Sent: 2007年12月15日 3:05
>> > To: Shan, Haitao
>> > Cc: Keir Fraser; xen-devel@lists.xensource.com; Jiang, Yunhong
>> > Subject: Re: [Xen-devel] Re: [PATCH] Enable Core 2 Duo
>> > PerformanceCounters inHVM guest
>> > 
>> > "Shan, Haitao" <haitao.shan@intel.com> writes:
>> > 
>> > Hello Haitao, 
>> > 
>> > I've done a look on the patch and found two places with wrong coding
>> > style. Take a look bellow:
>> > 
>>> >> diff -r 8f0cbfc478d6 xen/arch/x86/oprofile/op_model_ppro.c
>>> >> --- a/xen/arch/x86/oprofile/op_model_ppro.c  Thu Dec 13 09:31:03 2007
>>> >> +0000 +++ b/xen/arch/x86/oprofile/op_model_ppro.c    Fri Dec 14
>>> >> 07:21:43 2007 +0800 @@ -126,6 +127,11 @@ static void
>>> >>      ppro_start(struct op_msrs co CTRL_READ(low, high, msrs, 0);
>>> >>      CTRL_SET_ACTIVE(low);
>>> >>      CTRL_WRITE(low, high, msrs, 0);
>>> >> +    /* Global Control MSR is enabled by default when system power
>>> >> on. +     * However, this may not hold true when xenoprof starts to
>>> >> run. +     */ +    if ( ppro_has_global_ctrl )
>>> >> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 1);
>>> >>  } 
>>> >> 
>>> >> 
>>> >> @@ -135,6 +141,8 @@ static void ppro_stop(struct op_msrs con
>>> >>      CTRL_READ(low, high, msrs, 0);
>>> >>      CTRL_SET_INACTIVE(low);
>>> >>      CTRL_WRITE(low, high, msrs, 0);
>>> >> +    if ( ppro_has_global_ctrl )
>>> >> +        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
>>> >>  } 
>> > 
>> > Cheers, 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel



[-- Attachment #1.2: Type: text/html, Size: 4722 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2007-12-17  8:55 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-12-11 10:06 [PATCH] Enable Core 2 Duo Performance Counters in HVM guest Shan, Haitao
2007-12-11 10:37 ` Keir Fraser
2007-12-11 13:32   ` Shan, Haitao
2007-12-11 15:01     ` Keir Fraser
2007-12-14  7:49       ` Re: [PATCH] Enable Core 2 Duo Performance Counters inHVM guest Shan, Haitao
2007-12-14  8:31         ` Keir Fraser
2007-12-14  9:32           ` Shan, Haitao
2007-12-14  9:54             ` Keir Fraser
2007-12-14 10:01               ` Re: [PATCH] Enable Core 2 Duo PerformanceCounters " Shan, Haitao
2007-12-14 15:43               ` Shan, Haitao
2007-12-14 19:05                 ` Otavio Salvador
2007-12-15  1:09                   ` Shan, Haitao
2007-12-17  7:22                     ` Re: [PATCH] Enable Core 2 Duo PerformanceCountersinHVM guest Shan, Haitao
2007-12-17  8:55                       ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.