From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: jun.nakajima@intel.com, JBeulich@suse.com,
George.Dunlap@eu.citrix.com, jacob.shin@amd.com,
eddie.dong@intel.com, dietmar.hahn@ts.fujitsu.com,
suravee.suthikulpanit@amd.com,
Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v2 11/13] x86/PMU: Handle PMU interrupts for PV guests
Date: Fri, 20 Sep 2013 05:42:10 -0400 [thread overview]
Message-ID: <1379670132-1748-12-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1379670132-1748-1-git-send-email-boris.ostrovsky@oracle.com>
Add support for handling PMU interrupts for PV guests, make these interrupts
NMI instead of PMU_APIC_VECTOR vector. Depending on vpmu_mode forward the
interrupts to appropriate guest (mode is VPMU_ON) or to dom0 (VPMU_DOM0).
VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
hypercall. This allows the guest to access PMU MSR values that are stored in
VPMU context which is shared between hypervisor and domain, thus avoiding
traps to hypervisor.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/arch/x86/apic.c | 13 ---
xen/arch/x86/hvm/svm/vpmu.c | 8 +-
xen/arch/x86/hvm/vmx/vpmu_core2.c | 8 +-
xen/arch/x86/hvm/vpmu.c | 111 +++++++++++++++++++++++--
xen/include/asm-x86/hvm/vpmu.h | 1 +
xen/include/asm-x86/irq.h | 1 -
xen/include/asm-x86/mach-default/irq_vectors.h | 1 -
xen/include/public/xenpmu.h | 1 +
8 files changed, 115 insertions(+), 29 deletions(-)
diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index a52a0e8..9675e76 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -125,9 +125,6 @@ void __init apic_intr_init(void)
/* IPI vectors for APIC spurious and error interrupts */
set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
-
- /* Performance Counters Interrupt */
- set_direct_apic_vector(PMU_APIC_VECTOR, pmu_apic_interrupt);
}
/* Using APIC to generate smp_local_timer_interrupt? */
@@ -1368,16 +1365,6 @@ void error_interrupt(struct cpu_user_regs *regs)
}
/*
- * This interrupt handles performance counters interrupt
- */
-
-void pmu_apic_interrupt(struct cpu_user_regs *regs)
-{
- ack_APIC_irq();
- vpmu_do_interrupt(regs);
-}
-
-/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index 527a1de..3993a95 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -283,8 +283,8 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
return 1;
vpmu_set(vpmu, VPMU_RUNNING);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ vpmu->hw_lapic_lvtpc = APIC_DM_NMI;
if ( is_hvm_domain(v->domain) &&
!((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
@@ -295,8 +295,8 @@ static int amd_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
if ( (get_pmu_reg_type(msr) == MSR_TYPE_CTRL) &&
(is_pmu_enabled(msr_content) == 0) && vpmu_is_set(vpmu, VPMU_RUNNING) )
{
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED;
vpmu_reset(vpmu, VPMU_RUNNING);
if ( is_hvm_domain(v->domain) &&
((struct amd_vpmu_context *)vpmu->context)->msr_bitmap_set )
diff --git a/xen/arch/x86/hvm/vmx/vpmu_core2.c b/xen/arch/x86/hvm/vmx/vpmu_core2.c
index ebbb516..27f0807 100644
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c
@@ -548,13 +548,13 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
if ( vpmu_is_set(vpmu, VPMU_RUNNING) &&
is_vlapic_lvtpc_enabled(vcpu_vlapic(v)) )
{
- apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR;
+ apic_write_around(APIC_LVTPC, APIC_DM_NMI);
+ vpmu->hw_lapic_lvtpc = APIC_DM_NMI;
}
else
{
- apic_write_around(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ apic_write_around(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = APIC_DM_NMI | APIC_LVT_MASKED;
}
}
diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
index 4638193..1ea3a96 100644
--- a/xen/arch/x86/hvm/vpmu.c
+++ b/xen/arch/x86/hvm/vpmu.c
@@ -47,6 +47,7 @@ uint32_t __read_mostly vpmu_mode = XENPMU_MODE_OFF;
static void parse_vpmu_param(char *s);
custom_param("vpmu", parse_vpmu_param);
+static void vpmu_save_force(void *arg);
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
static void __init parse_vpmu_param(char *s)
@@ -74,7 +75,7 @@ static void vpmu_lvtpc_update(uint32_t val)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = APIC_DM_NMI | (val & APIC_LVT_MASKED);
apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
}
@@ -82,6 +83,9 @@ int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ if ( (vpmu_mode & XENPMU_MODE_PRIV) && (current->domain != dom0) )
+ return 0;
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content);
return 0;
@@ -91,6 +95,9 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ if ( (vpmu_mode & XENPMU_MODE_PRIV) && (current->domain != dom0) )
+ return 0;
+
if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
return 0;
@@ -99,17 +106,97 @@ int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
int vpmu_do_interrupt(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
- struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct vpmu_struct *vpmu;
- if ( vpmu->arch_vpmu_ops )
+
+ /* dom0 will handle this interrupt */
+ if ( (vpmu_mode & XENPMU_MODE_PRIV) ||
+ (v->domain->domain_id >= DOMID_FIRST_RESERVED) )
+ {
+ if ( smp_processor_id() >= dom0->max_vcpus )
+ return 0;
+ v = dom0->vcpu[smp_processor_id()];
+ }
+
+ vpmu = vcpu_vpmu(v);
+ if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
+ return 0;
+
+ if ( !is_hvm_domain(v->domain) || (vpmu_mode & XENPMU_MODE_PRIV) )
+ {
+ /* PV guest or dom0 is doing system profiling */
+ void *p;
+ struct cpu_user_regs *gregs;
+
+ p = &v->arch.vpmu.xenpmu_data->pmu.regs;
+
+ /* PV guest will be reading PMU MSRs from xenpmu_data */
+ vpmu_save_force(v);
+
+ /* Store appropriate registers in xenpmu_data
+ *
+ * Note: '!current->is_running' is possible when 'set_current(next)'
+ * for the (HVM) guest has been called but 'reset_stack_and_jump()'
+ * has not (i.e. the guest is not actually running yet).
+ */
+ if ( !is_hvm_domain(current->domain) ||
+ ((vpmu_mode & XENPMU_MODE_PRIV) && !current->is_running) )
+ {
+ /*
+ * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
+ * and therefore we treat it the same way as a non-priviledged
+ * PV 32-bit domain.
+ */
+ if ( is_pv_32bit_domain(current->domain) )
+ {
+ struct compat_cpu_user_regs cmp;
+
+ gregs = guest_cpu_user_regs();
+ XLAT_cpu_user_regs(&cmp, gregs);
+ memcpy(p, &cmp, sizeof(struct compat_cpu_user_regs));
+ }
+ else if ( (current->domain != dom0) && !is_idle_vcpu(current) &&
+ !(vpmu_mode & XENPMU_MODE_PRIV) )
+ {
+ /* PV guest */
+ gregs = guest_cpu_user_regs();
+ memcpy(p, gregs, sizeof(struct cpu_user_regs));
+ }
+ else
+ memcpy(p, regs, sizeof(struct cpu_user_regs));
+ }
+ else
+ {
+ /* HVM guest */
+ struct segment_register cs;
+
+ gregs = guest_cpu_user_regs();
+ hvm_get_segment_register(current, x86_seg_cs, &cs);
+ gregs->cs = cs.attr.fields.dpl;
+
+ memcpy(p, gregs, sizeof(struct cpu_user_regs));
+ }
+
+ v->arch.vpmu.xenpmu_data->domain_id = current->domain->domain_id;
+ v->arch.vpmu.xenpmu_data->vcpu_id = current->vcpu_id;
+ v->arch.vpmu.xenpmu_data->pcpu_id = smp_processor_id();
+
+ raise_softirq(PMU_SOFTIRQ);
+ vpmu_set(vpmu, VPMU_WAIT_FOR_FLUSH);
+
+ return 1;
+ }
+ else if ( vpmu->arch_vpmu_ops )
{
- struct vlapic *vlapic = vcpu_vlapic(v);
+ /* HVM guest */
+ struct vlapic *vlapic;
u32 vlapic_lvtpc;
unsigned char int_vec;
if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) )
return 0;
+ vlapic = vcpu_vlapic(v);
if ( !is_vlapic_lvtpc_enabled(vlapic) )
return 1;
@@ -169,7 +256,7 @@ void vpmu_save(struct vcpu *v)
if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v) )
vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED);
}
void vpmu_load(struct vcpu *v)
@@ -223,7 +310,13 @@ void vpmu_load(struct vcpu *v)
vpmu->arch_vpmu_ops->arch_vpmu_load(v);
}
- vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
+ /*
+ * PMU interrupt may happen while loading the context above. That
+ * may cause vpmu_save_force() in the handler so we we don't
+ * want to mark the context as loaded.
+ */
+ if ( !vpmu_is_set(vpmu, VPMU_WAIT_FOR_FLUSH) )
+ vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
}
void vpmu_initialise(struct vcpu *v)
@@ -444,6 +537,12 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg)
vpmu_lvtpc_update((uint32_t)pmu_params.val);
ret = 0;
break;
+
+ case XENPMU_flush:
+ vpmu_reset(vcpu_vpmu(current), VPMU_WAIT_FOR_FLUSH);
+ vpmu_load(current);
+ ret = 0;
+ break;
}
return ret;
diff --git a/xen/include/asm-x86/hvm/vpmu.h b/xen/include/asm-x86/hvm/vpmu.h
index e046afd..348fc9a 100644
--- a/xen/include/asm-x86/hvm/vpmu.h
+++ b/xen/include/asm-x86/hvm/vpmu.h
@@ -68,6 +68,7 @@ struct vpmu_struct {
#define VPMU_CONTEXT_SAVE 0x8 /* Force context save */
#define VPMU_FROZEN 0x10 /* Stop counters while VCPU is not running */
#define VPMU_PASSIVE_DOMAIN_ALLOCATED 0x20
+#define VPMU_WAIT_FOR_FLUSH 0x40 /* PV guest waits for XENPMU_flush */
#define vpmu_set(_vpmu, _x) ((_vpmu)->flags |= (_x))
#define vpmu_reset(_vpmu, _x) ((_vpmu)->flags &= ~(_x))
diff --git a/xen/include/asm-x86/irq.h b/xen/include/asm-x86/irq.h
index 7f5da06..e582a72 100644
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -88,7 +88,6 @@ void invalidate_interrupt(struct cpu_user_regs *regs);
void call_function_interrupt(struct cpu_user_regs *regs);
void apic_timer_interrupt(struct cpu_user_regs *regs);
void error_interrupt(struct cpu_user_regs *regs);
-void pmu_apic_interrupt(struct cpu_user_regs *regs);
void spurious_interrupt(struct cpu_user_regs *regs);
void irq_move_cleanup_interrupt(struct cpu_user_regs *regs);
diff --git a/xen/include/asm-x86/mach-default/irq_vectors.h b/xen/include/asm-x86/mach-default/irq_vectors.h
index 992e00c..46dcfaf 100644
--- a/xen/include/asm-x86/mach-default/irq_vectors.h
+++ b/xen/include/asm-x86/mach-default/irq_vectors.h
@@ -8,7 +8,6 @@
#define EVENT_CHECK_VECTOR 0xfc
#define CALL_FUNCTION_VECTOR 0xfb
#define LOCAL_TIMER_VECTOR 0xfa
-#define PMU_APIC_VECTOR 0xf9
/*
* High-priority dynamically-allocated vectors. For interrupts that
* must be higher priority than any guest-bound interrupt.
diff --git a/xen/include/public/xenpmu.h b/xen/include/public/xenpmu.h
index 0060670..f05fdfa 100644
--- a/xen/include/public/xenpmu.h
+++ b/xen/include/public/xenpmu.h
@@ -28,6 +28,7 @@
#define XENPMU_init 4
#define XENPMU_finish 5
#define XENPMU_lvtpc_set 6
+#define XENPMU_flush 7 /* Write cached MSR values to HW */
/* ` } */
/* Parameters structure for HYPERVISOR_xenpmu_op call */
--
1.8.1.4
next prev parent reply other threads:[~2013-09-20 9:42 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-20 9:41 [PATCH v2 00/13] x86/PMU: Xen PMU PV support Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 01/13] Export hypervisor symbols Boris Ostrovsky
2013-09-23 19:42 ` Konrad Rzeszutek Wilk
2013-09-23 20:06 ` Boris Ostrovsky
2013-09-24 17:40 ` Konrad Rzeszutek Wilk
2013-09-25 13:15 ` Jan Beulich
2013-09-25 14:03 ` Boris Ostrovsky
2013-09-25 14:53 ` Jan Beulich
2013-09-20 9:42 ` [PATCH v2 02/13] Set VCPU's is_running flag closer to when the VCPU is dispatched Boris Ostrovsky
2013-09-25 13:42 ` Jan Beulich
2013-09-25 14:08 ` Keir Fraser
2013-09-20 9:42 ` [PATCH v2 03/13] x86/PMU: Stop AMD counters when called from vpmu_save_force() Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 04/13] x86/VPMU: Minor VPMU cleanup Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 05/13] intel/VPMU: Clean up Intel VPMU code Boris Ostrovsky
2013-09-23 11:42 ` Dietmar Hahn
2013-09-23 19:46 ` Konrad Rzeszutek Wilk
2013-09-25 13:55 ` Jan Beulich
2013-09-25 14:39 ` Boris Ostrovsky
2013-09-25 14:57 ` Jan Beulich
2013-09-25 15:37 ` Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 06/13] x86/PMU: Add public xenpmu.h Boris Ostrovsky
2013-09-23 13:04 ` Dietmar Hahn
2013-09-23 13:16 ` Jan Beulich
2013-09-23 14:00 ` Boris Ostrovsky
2013-09-23 13:45 ` Boris Ostrovsky
2013-09-25 14:04 ` Jan Beulich
2013-09-25 15:59 ` Boris Ostrovsky
2013-09-25 16:08 ` Jan Beulich
2013-09-30 13:25 ` Boris Ostrovsky
2013-09-30 13:30 ` Jan Beulich
2013-09-30 13:55 ` Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 07/13] x86/PMU: Make vpmu not HVM-specific Boris Ostrovsky
2013-09-25 14:05 ` Jan Beulich
2013-09-25 14:49 ` Boris Ostrovsky
2013-09-25 14:57 ` Jan Beulich
2013-09-20 9:42 ` [PATCH v2 08/13] x86/PMU: Interface for setting PMU mode and flags Boris Ostrovsky
2013-09-25 14:11 ` Jan Beulich
2013-09-25 14:55 ` Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 09/13] x86/PMU: Initialize PMU for PV guests Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 10/13] x86/PMU: Add support for PMU registes handling on " Boris Ostrovsky
2013-09-23 13:50 ` Dietmar Hahn
2013-09-25 14:23 ` Jan Beulich
2013-09-25 15:03 ` Boris Ostrovsky
2013-09-20 9:42 ` Boris Ostrovsky [this message]
2013-09-25 14:33 ` [PATCH v2 11/13] x86/PMU: Handle PMU interrupts for " Jan Beulich
2013-09-25 14:40 ` Andrew Cooper
2013-09-25 15:52 ` Boris Ostrovsky
2013-09-25 15:19 ` Boris Ostrovsky
2013-09-25 15:25 ` Jan Beulich
2013-09-20 9:42 ` [PATCH v2 12/13] x86/PMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
2013-09-20 9:42 ` [PATCH v2 13/13] x86/PMU: Move vpmu files up from hvm directory Boris Ostrovsky
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1379670132-1748-12-git-send-email-boris.ostrovsky@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=dietmar.hahn@ts.fujitsu.com \
--cc=eddie.dong@intel.com \
--cc=jacob.shin@amd.com \
--cc=jun.nakajima@intel.com \
--cc=suravee.suthikulpanit@amd.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).