xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: kevin.tian@intel.com, keir@xen.org, jbeulich@suse.com,
	jun.nakajima@intel.com, andrew.cooper3@citrix.com, tim@xen.org,
	dietmar.hahn@ts.fujitsu.com, xen-devel@lists.xen.org,
	Aravind.Gopalakrishnan@amd.com, suravee.suthikulpanit@amd.com
Subject: Re: [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for PV guests
Date: Tue, 23 Sep 2014 13:18:19 -0400	[thread overview]
Message-ID: <20140923171819.GP3007@laptop.dumpdata.com> (raw)
In-Reply-To: <1411430281-6132-17-git-send-email-boris.ostrovsky@oracle.com>

On Mon, Sep 22, 2014 at 07:57:57PM -0400, Boris Ostrovsky wrote:
> Add support for handling PMU interrupts for PV guests.
> 
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
> 
> Since the the interrupt handler may now force VPMU context save (i.e. set

Extra 'the'
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters are stopped.

s/are/were/

> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
>  xen/arch/x86/hvm/svm/vpmu.c |  11 +--
>  xen/arch/x86/hvm/vpmu.c     | 187 ++++++++++++++++++++++++++++++++++++++++----
>  xen/include/public/pmu.h    |   7 ++
>  3 files changed, 183 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 63c099c..055b21c 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -229,17 +229,12 @@ static int amd_vpmu_save(struct vcpu *v)
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      unsigned int i;
>  
> -    /*
> -     * Stop the counters. If we came here via vpmu_save_force (i.e.
> -     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> -     */
> +    for ( i = 0; i < num_counters; i++ )
> +        wrmsrl(ctrls[i], 0);
> +
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
>      {
>          vpmu_set(vpmu, VPMU_FROZEN);
> -
> -        for ( i = 0; i < num_counters; i++ )
> -            wrmsrl(ctrls[i], 0);
> -
>          return 0;
>      }
>  
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index edc5f91..018221d 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -79,44 +79,191 @@ static void __init parse_vpmu_param(char *s)
>  
>  void vpmu_lvtpc_update(uint32_t val)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> -    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> +    if ( is_hvm_domain(curr->domain) ||
> +         !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
>  
>  int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> -        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> +        /*
> +         * We may have received a PMU interrupt during WRMSR handling
> +         * and since do_wrmsr may load VPMU context we should save
> +         * (and unload) it again.
> +         */
> +        if ( !is_hvm_domain(curr->domain) &&
> +             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
>  int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> -        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +    {

You have a nice comment in the above code. Could you replicate it
here or just point the reader of the code to the reasoning?

> +        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> +        if ( !is_hvm_domain(curr->domain) &&
> +             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
> +static struct vcpu *choose_hwdom_vcpu(void)
> +{
> +    struct vcpu *v;
> +    unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> +    if ( hardware_domain->vcpu == NULL )
> +        return NULL;
> +
> +    v = hardware_domain->vcpu[idx];
> +
> +    /*
> +     * If index is not populated search downwards the vcpu array until
> +     * a valid vcpu can be found
> +     */
> +    while ( !v && idx-- )
> +        v = hardware_domain->vcpu[idx];
> +
> +    return v;
> +}
> +
>  int vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vcpu *sampled = current, *sampling;
> +    struct vpmu_struct *vpmu;
> +
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        sampling = choose_hwdom_vcpu();
> +        if ( !sampling )
> +            return 0;
> +    }
> +    else
> +        sampling = sampled;
> +
> +    vpmu = vcpu_vpmu(sampling);
> +    if ( !is_hvm_domain(sampling->domain) )
> +    {
> +        /* PV(H) guest */
> +        const struct cpu_user_regs *cur_regs;
> +
> +        if ( !vpmu->xenpmu_data )
> +            return 0;
> +
> +        if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED )
> +            return 1;
> +
> +        if ( is_pvh_domain(sampled->domain) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> +            return 0;
> +
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling);
> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        if ( is_pv_32bit_domain(sampling->domain) )
> +        {
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-privileged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_pmu_regs *cmp;
> +
> +            cur_regs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> +            cmp->eip = cur_regs->rip;
> +            cmp->esp = cur_regs->rsp;
> +            cmp->cs = cur_regs->cs;
> +            if ( (cmp->cs & 3) == 1 )
> +                cmp->cs &= ~3;
> +        }
> +        else
> +        {
> +            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> +            /* Non-privileged domains are always in XENPMU_MODE_SELF mode */
> +            if ( (vpmu_mode & XENPMU_MODE_SELF) ||
> +                 (!is_hardware_domain(sampled->domain) &&
> +                  !is_idle_vcpu(sampled)) )
> +                cur_regs = guest_cpu_user_regs();
> +            else
> +                cur_regs = regs;
> +
> +            r->rip = cur_regs->rip;
> +            r->rsp = cur_regs->rsp;
> +
> +            if ( !is_pvh_domain(sampled->domain) )
> +            {
> +                r->cs = cur_regs->cs;
> +                if ( sampled->arch.flags & TF_kernel_mode )
> +                    r->cs &= ~3;
> +            }
> +            else
> +            {
> +                struct segment_register seg_cs;
> +
> +                hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> +                r->cs = seg_cs.sel;
> +            }
> +        }
> +
> +        vpmu->xenpmu_data->domain_id = DOMID_SELF;
> +        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> +        vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +
> +        vpmu->xenpmu_data->pmu_flags |= PMU_CACHED;
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> +        return 1;
> +    }
>  
>      if ( vpmu->arch_vpmu_ops )
>      {
> -        struct vlapic *vlapic = vcpu_vlapic(v);
> +        struct vlapic *vlapic = vcpu_vlapic(sampling);
>          u32 vlapic_lvtpc;
>          unsigned char int_vec;
>  
> @@ -130,9 +277,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>          int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
>  
>          if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> -            vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
> +            vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
>          else
> -            v->nmi_pending = 1;
> +            sampling->nmi_pending = 1;
>          return 1;
>      }
>  
> @@ -231,7 +378,9 @@ void vpmu_load(struct vcpu *v)
>      local_irq_enable();
>  
>      /* Only when PMU is counting, we load PMU context immediately. */
> -    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> +    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> +         (!is_hvm_domain(v->domain) &&
> +          (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
>          return;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> @@ -456,6 +605,7 @@ static int vpmu_force_context_switch(void)
>  long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>  {
>      int ret = -EINVAL;
> +    struct vcpu *curr;
>      xen_pmu_params_t pmu_params;
>  
>      switch ( op )
> @@ -549,9 +699,18 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>          break;
>  
>      case XENPMU_lvtpc_set:
> -        if ( current->arch.vpmu.xenpmu_data == NULL )
> +        curr = current;
> +        if ( curr->arch.vpmu.xenpmu_data == NULL )
>              return -EINVAL;
> -        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        ret = 0;
> +        break;
> +
> +    case XENPMU_flush:
> +        curr = current;
> +        curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_load(curr);
>          ret = 0;
>          break;
>      }
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 68a5fb8..a1886a5 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -28,6 +28,7 @@
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
>  #define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> @@ -61,6 +62,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
>   */
>  #define XENPMU_FEATURE_INTEL_BTS  1
>  
> +/*
> + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
> + * the hypervisor
> + */
> +#define PMU_CACHED 1
> +
>  /* Shared between hypervisor and PV domain */
>  struct xen_pmu_data {
>      uint32_t domain_id;
> -- 
> 1.8.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

  reply	other threads:[~2014-09-23 17:18 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-22 23:57 [PATCH v11 for-xen-4.5 00/20] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 01/20] common/symbols: Export hypervisor symbols to privileged guest Boris Ostrovsky
2014-09-23 14:28   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 02/20] x86/VPMU: Manage VPMU_CONTEXT_SAVE flag in vpmu_save_force() Boris Ostrovsky
2014-09-23 14:44   ` Konrad Rzeszutek Wilk
2014-09-23 15:06     ` Boris Ostrovsky
2014-09-23 15:26       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 03/20] x86/VPMU: Set MSR bitmaps only for HVM/PVH guests Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 04/20] x86/VPMU: Make vpmu macros a bit more efficient Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 05/20] intel/VPMU: Clean up Intel VPMU code Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 06/20] vmx: Merge MSR management routines Boris Ostrovsky
2014-09-23 14:48   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 07/20] x86/VPMU: Handle APIC_LVTPC accesses Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 08/20] intel/VPMU: MSR_CORE_PERF_GLOBAL_CTRL should be initialized to zero Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 09/20] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 10/20] x86/VPMU: Make vpmu not HVM-specific Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 11/20] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
2014-09-23 14:55   ` Konrad Rzeszutek Wilk
2014-09-23 18:58   ` Konrad Rzeszutek Wilk
2014-09-23 19:24     ` Boris Ostrovsky
2014-09-23 20:47       ` Daniel De Graaf
2014-09-23 21:06         ` Boris Ostrovsky
2014-09-24  8:31           ` Jan Beulich
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 12/20] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
2014-09-23 14:59   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 13/20] x86/VPMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
2014-09-23 15:01   ` Konrad Rzeszutek Wilk
2014-09-23 15:10     ` Boris Ostrovsky
2014-09-23 15:18       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 14/20] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 15/20] x86/VPMU: Add support for PMU register handling on PV guests Boris Ostrovsky
2014-09-23 15:08   ` Konrad Rzeszutek Wilk
2014-09-23 15:49     ` Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for " Boris Ostrovsky
2014-09-23 17:18   ` Konrad Rzeszutek Wilk [this message]
2014-09-23 17:36     ` Boris Ostrovsky
2014-09-23 18:31   ` Konrad Rzeszutek Wilk
2014-09-23 18:36     ` Konrad Rzeszutek Wilk
2014-09-23 18:57     ` Boris Ostrovsky
2014-09-23 19:07       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 17/20] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr Boris Ostrovsky
2014-09-23 18:38   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 18/20] x86/VPMU: Add privileged PMU mode Boris Ostrovsky
2014-09-23 18:40   ` Konrad Rzeszutek Wilk
2014-09-22 23:58 ` [PATCH v11 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support Boris Ostrovsky
2014-09-23 18:52   ` Konrad Rzeszutek Wilk
2014-09-23 19:18     ` Boris Ostrovsky
2014-09-23 19:27       ` Konrad Rzeszutek Wilk
2014-09-22 23:58 ` [PATCH v11 for-xen-4.5 20/20] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
2014-09-23 18:53   ` Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140923171819.GP3007@laptop.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=Aravind.Gopalakrishnan@amd.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=dietmar.hahn@ts.fujitsu.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).