[PATCH v8 18/19] x86/VPMU: NMI-based VPMU support

xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed

From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: jbeulich@suse.com, kevin.tian@intel.com, suravee.suthikulpanit@amd.com
Cc: keir@xen.org, andrew.cooper3@citrix.com, tim@xen.org,
	dietmar.hahn@ts.fujitsu.com, xen-devel@lists.xen.org,
	jun.nakajima@intel.com, boris.ostrovsky@oracle.com
Subject: [PATCH v8 18/19] x86/VPMU: NMI-based VPMU support
Date: Tue,  1 Jul 2014 10:37:59 -0400	[thread overview]
Message-ID: <1404225480-2664-19-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1404225480-2664-1-git-send-email-boris.ostrovsky@oracle.com>

Add support for using NMIs as PMU interrupts.

Most of processing is still performed by vpmu_do_interrupt(). However, since
certain operations are not NMI-safe we defer them to a softint that vpmu_do_interrupt()
will schedule:
* For PV guests that would be send_guest_vcpu_virq()
* For HVM guests it's VLAPIC accesses and hvm_get_segment_register() (the later
can be called in privileged profiling mode when the interrupted guest is an HVM one).

With send_guest_vcpu_virq() and hvm_get_segment_register() for PV(H) and vlapic
accesses for HVM moved to sofint, the only routines/macros that vpmu_do_interrupt()
calls in NMI mode are:
* memcpy()
* querying domain type (is_XX_domain())
* guest_cpu_user_regs()
* XLAT_cpu_user_regs()
* raise_softirq()
* vcpu_vpmu()
* vpmu_ops->arch_vpmu_save()
* vpmu_ops->do_interrupt() (in the future for PVH support)

The latter two only access PMU MSRs with {rd,wr}msrl() (not the _safe versions
which would not be NMI-safe).

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Kevin Tian <kevint.tian@intel.com>
Reviewed-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@ts.fujitsu.com>
---
 xen/arch/x86/hvm/svm/vpmu.c       |   3 +-
 xen/arch/x86/hvm/vmx/vpmu_core2.c |   3 +-
 xen/arch/x86/hvm/vpmu.c           | 190 +++++++++++++++++++++++++++++++-------
 xen/include/asm-x86/hvm/vpmu.h    |   4 +-
 xen/include/xen/softirq.h         |   1 +
 5 files changed, 163 insertions(+), 38 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 22bbbea..56f8469 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -165,7 +165,7 @@ static void amd_vpmu_unset_msr_bitmap(struct vcpu *v)
     msr_bitmap_off(vpmu);
 }
 
-static int amd_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int amd_vpmu_do_interrupt(const struct cpu_user_regs *regs)
 {
     return 1;
 }
@@ -220,6 +220,7 @@ static inline void context_save(struct vcpu *v)
         rdmsrl(counters[i], counter_regs[i]);
 }
 
+/* Must be NMI-safe */
 static int amd_vpmu_save(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index d902fb1..1304699 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -303,6 +303,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
 }
 
+/* Must be NMI-safe */
 static int core2_vpmu_save(struct vcpu *v)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
@@ -713,7 +714,7 @@ static void core2_vpmu_dump(const struct vcpu *v)
     }
 }
 
-static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
+static int core2_vpmu_do_interrupt(const struct cpu_user_regs *regs)
 {
     struct vcpu *v = current;
     u64 msr_content;
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 87559fe..7ffd005 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -36,6 +36,7 @@
 #include <asm/hvm/svm/svm.h>
 #include <asm/hvm/svm/vmcb.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 #include <public/pmu.h>
 #include <xen/tasklet.h>
 
@@ -55,27 +56,53 @@ uint64_t __read_mostly vpmu_features = 0;
 static void parse_vpmu_param(char *s);
 custom_param("vpmu", parse_vpmu_param);
 
+static void pmu_softnmi(void);
+
 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
+static DEFINE_PER_CPU(struct vcpu *, sampled_vcpu);
+
+static uint32_t __read_mostly vpmu_interrupt_type = PMU_APIC_VECTOR;
 
 static void __init parse_vpmu_param(char *s)
 {
-    switch ( parse_bool(s) )
-    {
-    case 0:
-        break;
-    default:
-        if ( !strcmp(s, "bts") )
-            vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
-        else if ( *s )
+    char *ss;
+
+    vpmu_mode = XENPMU_MODE_SELF;
+    if (*s == '\0')
+        return;
+
+    do {
+        ss = strchr(s, ',');
+        if ( ss )
+            *ss = '\0';
+
+        switch  ( parse_bool(s) )
         {
-            printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+        case 0:
+            vpmu_mode = XENPMU_MODE_OFF;
+            return;
+        case -1:
+            if ( !strcmp(s, "nmi") )
+                vpmu_interrupt_type = APIC_DM_NMI;
+            else if ( !strcmp(s, "bts") )
+                vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
+            else if ( !strcmp(s, "all") )
+            {
+                vpmu_mode &= ~XENPMU_MODE_SELF;
+                vpmu_mode |= XENPMU_MODE_ALL;
+            }
+            else
+            {
+                printk("VPMU: unknown flag: %s - vpmu disabled!\n", s);
+                vpmu_mode = XENPMU_MODE_OFF;
+                return;
+            }
+        default:
             break;
         }
-        /* fall through */
-    case 1:
-        vpmu_mode = XENPMU_MODE_SELF;
-        break;
-    }
+
+        s = ss + 1;
+    } while ( ss );
 }
 
 void vpmu_lvtpc_update(uint32_t val)
@@ -83,7 +110,7 @@ void vpmu_lvtpc_update(uint32_t val)
     struct vcpu *v = current;
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
 
-    vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+    vpmu->hw_lapic_lvtpc = vpmu_interrupt_type | (val & APIC_LVT_MASKED);
 
     /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
     if ( is_hvm_domain(v->domain) ||
@@ -91,6 +118,24 @@ void vpmu_lvtpc_update(uint32_t val)
         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
 }
 
+static void vpmu_send_interrupt(struct vcpu *v)
+{
+    struct vlapic *vlapic;
+    u32 vlapic_lvtpc;
+
+    ASSERT( is_hvm_vcpu(v) );
+
+    vlapic = vcpu_vlapic(v);
+    if ( !is_vlapic_lvtpc_enabled(vlapic) )
+        return;
+
+    vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
+    if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
+        vlapic_set_irq(vcpu_vlapic(v), vlapic_lvtpc & APIC_VECTOR_MASK, 0);
+    else
+        v->nmi_pending = 1;
+}
+
 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, uint8_t rw)
 {
     struct vcpu *v = current;
@@ -151,7 +196,8 @@ static struct vcpu *choose_hwdom_vcpu(void)
     return v;
 }
 
-int vpmu_do_interrupt(struct cpu_user_regs *regs)
+/* This routine may be called in NMI context */
+int vpmu_do_interrupt(const struct cpu_user_regs *regs)
 {
     struct vcpu *curr_vcpu, *v = current;
     struct vpmu_struct *vpmu;
@@ -232,8 +278,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
                 if ( current->arch.flags & TF_kernel_mode )
                     v->arch.vpmu.xenpmu_data->pmu.r.regs.cs &= ~3;
             }
-            else
+            else if ( !(vpmu_interrupt_type & APIC_DM_NMI) )
             {
+                /* Unsafe in NMI context, defer to softint later */
                 struct segment_register seg_cs;
 
                 hvm_get_segment_register(current, x86_seg_cs, &seg_cs);
@@ -249,8 +296,12 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
             memcpy(&v->arch.vpmu.xenpmu_data->pmu.r.regs,
                    gregs, sizeof(struct cpu_user_regs));
 
-            hvm_get_segment_register(current, x86_seg_cs, &seg_cs);
-            v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
+            /* This is unsafe in NMI context, we'll do it in softint handler */
+            if ( !(vpmu_interrupt_type & APIC_DM_NMI ) )
+            {
+                hvm_get_segment_register(current, x86_seg_cs, &seg_cs);
+                v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
+            }
         }
 
         vpmu->xenpmu_data->domain_id = (curr_vcpu == v) ?
@@ -263,30 +314,30 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
         vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
 
-        send_guest_vcpu_virq(v, VIRQ_XENPMU);
+        if ( vpmu_interrupt_type & APIC_DM_NMI )
+        {
+            per_cpu(sampled_vcpu, smp_processor_id()) = current;
+            raise_softirq(PMU_SOFTIRQ);
+        }
+        else
+            send_guest_vcpu_virq(v, VIRQ_XENPMU);
 
         return 1;
     }
 
     if ( vpmu->arch_vpmu_ops )
     {
-        struct vlapic *vlapic = vcpu_vlapic(v);
-        u32 vlapic_lvtpc;
-        unsigned char int_vec;
-
         if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
             return 0;
 
-        if ( !is_vlapic_lvtpc_enabled(vlapic) )
-            return 1;
-
-        vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
-        int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
-
-        if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
-            vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
+        if ( vpmu_interrupt_type & APIC_DM_NMI )
+        {
+            per_cpu(sampled_vcpu, smp_processor_id()) = current;
+            raise_softirq(PMU_SOFTIRQ);
+        }
         else
-            v->nmi_pending = 1;
+            vpmu_send_interrupt(v);
+
         return 1;
     }
 
@@ -317,6 +368,8 @@ static void vpmu_save_force(void *arg)
     vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
 
     per_cpu(last_vcpu, smp_processor_id()) = NULL;
+
+    pmu_softnmi();
 }
 
 void vpmu_save(struct vcpu *v)
@@ -334,7 +387,10 @@ void vpmu_save(struct vcpu *v)
         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
-    apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+    apic_write(APIC_LVTPC, vpmu_interrupt_type | APIC_LVT_MASKED);
+
+    /* Make sure there are no outstanding PMU NMIs */
+    pmu_softnmi();
 }
 
 void vpmu_load(struct vcpu *v)
@@ -379,6 +435,8 @@ void vpmu_load(struct vcpu *v)
         vpmu_save_force(prev);
         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
+        pmu_softnmi();
+
         vpmu = vcpu_vpmu(v);
     }
 
@@ -445,11 +503,55 @@ void vpmu_destroy(struct vcpu *v)
     }
 }
 
+/* Process the softirq set by PMU NMI handler */
+static void pmu_softnmi(void)
+{
+    struct vcpu *v, *sampled = this_cpu(sampled_vcpu);
+
+    if ( sampled == NULL )
+        return;
+    this_cpu(sampled_vcpu) = NULL;
+
+    if ( (vpmu_mode & XENPMU_MODE_ALL) ||
+         (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
+    {
+            v = choose_hwdom_vcpu();
+            if ( !v )
+                return;
+    }
+    else
+    {
+        if ( is_hvm_domain(sampled->domain) )
+        {
+            vpmu_send_interrupt(sampled);
+            return;
+        }
+        v = sampled;
+    }
+
+    if ( has_hvm_container_domain(sampled->domain) )
+    {
+        struct segment_register seg_cs;
+
+        hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
+        v->arch.vpmu.xenpmu_data->pmu.r.regs.cs = seg_cs.sel;
+    }
+
+    send_guest_vcpu_virq(v, VIRQ_XENPMU);
+}
+
+int pmu_nmi_interrupt(const struct cpu_user_regs *regs, int cpu)
+{
+    return vpmu_do_interrupt(regs);
+}
+
 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
 {
     struct vcpu *v;
     struct page_info *page;
     uint64_t gfn = params->val;
+    static bool_t __read_mostly pvpmu_initted = 0;
+    static DEFINE_SPINLOCK(init_lock);
 
     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu == NULL) ||
          (d->vcpu[params->vcpu] == NULL) )
@@ -473,6 +575,26 @@ static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
         return -EINVAL;
     }
 
+    spin_lock(&init_lock);
+
+    if ( !pvpmu_initted )
+    {
+        if ( reserve_lapic_nmi() == 0 )
+            set_nmi_callback(pmu_nmi_interrupt);
+        else
+        {
+            spin_unlock(&init_lock);
+            printk("Failed to reserve PMU NMI\n");
+            put_page(page);
+            return -EBUSY;
+        }
+        open_softirq(PMU_SOFTIRQ, pmu_softnmi);
+
+        pvpmu_initted = 1;
+    }
+
+    spin_unlock(&init_lock);
+
     vpmu_initialise(v);
 
     return 0;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index 0fc5317..026c425 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -41,7 +41,7 @@
 struct arch_vpmu_ops {
     int (*do_wrmsr)(unsigned int msr, uint64_t msr_content);
     int (*do_rdmsr)(unsigned int msr, uint64_t *msr_content);
-    int (*do_interrupt)(struct cpu_user_regs *regs);
+    int (*do_interrupt)(const struct cpu_user_regs *regs);
     void (*do_cpuid)(unsigned int input,
                      unsigned int *eax, unsigned int *ebx,
                      unsigned int *ecx, unsigned int *edx);
@@ -99,7 +99,7 @@ static inline bool_t vpmu_are_all_set(const struct vpmu_struct *vpmu,
 
 void vpmu_lvtpc_update(uint32_t val);
 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, uint8_t rw);
-int vpmu_do_interrupt(struct cpu_user_regs *regs);
+int vpmu_do_interrupt(const struct cpu_user_regs *regs);
 void vpmu_do_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
                                        unsigned int *ecx, unsigned int *edx);
 void vpmu_initialise(struct vcpu *v);
diff --git a/xen/include/xen/softirq.h b/xen/include/xen/softirq.h
index 0c0d481..5829fa4 100644
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -8,6 +8,7 @@ enum {
     NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ,
     RCU_SOFTIRQ,
     TASKLET_SOFTIRQ,
+    PMU_SOFTIRQ,
     NR_COMMON_SOFTIRQS
 };
 
-- 
1.8.1.4

next prev parent reply	other threads:[~2014-07-01 14:37 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-01 14:37 [PATCH v8 00/19] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 01/19] common/symbols: Export hypervisor symbols to privileged guest Boris Ostrovsky
2014-07-01 15:06   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 02/19] x86/VPMU: Set MSR bitmaps only for HVM/PVH guests Boris Ostrovsky
2014-07-28 13:48   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 03/19] x86/VPMU: Make vpmu macros a bit more efficient Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 04/19] intel/VPMU: Clean up Intel VPMU code Boris Ostrovsky
2014-07-28 14:00   ` Jan Beulich
2014-07-28 16:20     ` Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 05/19] vmx: Merge MSR management routines Boris Ostrovsky
2014-07-28 14:08   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 06/19] x86/VPMU: Handle APIC_LVTPC accesses Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 07/19] intel/VPMU: MSR_CORE_PERF_GLOBAL_CTRL should be initialized to zero Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 08/19] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
2014-07-28 14:21   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 09/19] x86/VPMU: Make vpmu not HVM-specific Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 10/19] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
2014-07-28 15:22   ` Jan Beulich
2014-07-28 16:29     ` Boris Ostrovsky
2014-07-28 16:36       ` Jan Beulich
2014-07-28 17:13         ` Boris Ostrovsky
2014-07-29  6:19           ` Jan Beulich
2014-07-29 14:31             ` Boris Ostrovsky
2014-07-29 15:21               ` Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 11/19] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 12/19] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
2014-07-28 16:26   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 13/19] x86/VPMU: Add support for PMU register handling on PV guests Boris Ostrovsky
2014-07-28 16:33   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 14/19] x86/VPMU: Handle PMU interrupts for " Boris Ostrovsky
2014-07-29  7:39   ` Jan Beulich
2014-07-01 14:37 ` [PATCH v8 15/19] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr Boris Ostrovsky
2014-07-29  7:46   ` Jan Beulich
2014-07-29 14:35     ` Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 16/19] x86/VPMU: Add privileged PMU mode Boris Ostrovsky
2014-07-29  7:55   ` Jan Beulich
2014-07-29 14:49     ` Boris Ostrovsky
2014-07-01 14:37 ` [PATCH v8 17/19] x86/VPMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
2014-07-01 14:37 ` Boris Ostrovsky [this message]
2014-07-29  8:03   ` [PATCH v8 18/19] x86/VPMU: NMI-based VPMU support Jan Beulich
2014-07-01 14:38 ` [PATCH v8 19/19] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
2014-07-29  8:07 ` [PATCH v8 00/19] x86/PMU: Xen PMU PV(H) support Jan Beulich
2014-07-29 15:00   ` Boris Ostrovsky

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:22bbbea dfblob:56f8469 dfblob:d902fb1 dfblob:1304699
dfblob:87559fe dfblob:7ffd005 dfblob:0fc5317 dfblob:026c425
dfblob:0c0d481 dfblob:5829fa4 )
 OR (
bs:"[PATCH v8 18/19] x86/VPMU: NMI-based VPMU support" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1404225480-2664-19-git-send-email-boris.ostrovsky@oracle.com \
    --to=boris.ostrovsky@oracle.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dietmar.hahn@ts.fujitsu.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).