All of lore.kernel.org
 help / color / mirror / Atom feed
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: kevin.tian@intel.com, keir@xen.org, jbeulich@suse.com,
	jun.nakajima@intel.com, andrew.cooper3@citrix.com, tim@xen.org,
	dietmar.hahn@ts.fujitsu.com, xen-devel@lists.xen.org,
	Aravind.Gopalakrishnan@amd.com, suravee.suthikulpanit@amd.com
Subject: Re: [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for PV guests
Date: Tue, 23 Sep 2014 13:18:19 -0400	[thread overview]
Message-ID: <20140923171819.GP3007@laptop.dumpdata.com> (raw)
In-Reply-To: <1411430281-6132-17-git-send-email-boris.ostrovsky@oracle.com>

On Mon, Sep 22, 2014 at 07:57:57PM -0400, Boris Ostrovsky wrote:
> Add support for handling PMU interrupts for PV guests.
> 
> VPMU for the interrupted VCPU is unloaded until the guest issues XENPMU_flush
> hypercall. This allows the guest to access PMU MSR values that are stored in
> VPMU context which is shared between hypervisor and domain, thus avoiding
> traps to hypervisor.
> 
> Since the the interrupt handler may now force VPMU context save (i.e. set

Extra 'the'
> VPMU_CONTEXT_SAVE flag) we need to make changes to amd_vpmu_save() which
> until now expected this flag to be set only when the counters are stopped.

s/are/were/

> 
> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> ---
>  xen/arch/x86/hvm/svm/vpmu.c |  11 +--
>  xen/arch/x86/hvm/vpmu.c     | 187 ++++++++++++++++++++++++++++++++++++++++----
>  xen/include/public/pmu.h    |   7 ++
>  3 files changed, 183 insertions(+), 22 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index 63c099c..055b21c 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -229,17 +229,12 @@ static int amd_vpmu_save(struct vcpu *v)
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>      unsigned int i;
>  
> -    /*
> -     * Stop the counters. If we came here via vpmu_save_force (i.e.
> -     * when VPMU_CONTEXT_SAVE is set) counters are already stopped.
> -     */
> +    for ( i = 0; i < num_counters; i++ )
> +        wrmsrl(ctrls[i], 0);
> +
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_SAVE) )
>      {
>          vpmu_set(vpmu, VPMU_FROZEN);
> -
> -        for ( i = 0; i < num_counters; i++ )
> -            wrmsrl(ctrls[i], 0);
> -
>          return 0;
>      }
>  
> diff --git a/xen/arch/x86/hvm/vpmu.c b/xen/arch/x86/hvm/vpmu.c
> index edc5f91..018221d 100644
> --- a/xen/arch/x86/hvm/vpmu.c
> +++ b/xen/arch/x86/hvm/vpmu.c
> @@ -79,44 +79,191 @@ static void __init parse_vpmu_param(char *s)
>  
>  void vpmu_lvtpc_update(uint32_t val)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
> -    apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +    /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
> +    if ( is_hvm_domain(curr->domain) ||
> +         !(vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
>  }
>  
>  int vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content, uint64_t supported)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_wrmsr )
> -        return vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +    {
> +        int ret = vpmu->arch_vpmu_ops->do_wrmsr(msr, msr_content, supported);
> +
> +        /*
> +         * We may have received a PMU interrupt during WRMSR handling
> +         * and since do_wrmsr may load VPMU context we should save
> +         * (and unload) it again.
> +         */
> +        if ( !is_hvm_domain(curr->domain) &&
> +             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
>  int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
>  {
> -    struct vpmu_struct *vpmu = vcpu_vpmu(current);
> +    struct vcpu *curr = current;
> +    struct vpmu_struct *vpmu = vcpu_vpmu(curr);
>  
>      if ( !(vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
>          return 0;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->do_rdmsr )
> -        return vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +    {

You have a nice comment in the above code. Could you replicate it
here or just point the reader of the code to the reasoning?

> +        int ret = vpmu->arch_vpmu_ops->do_rdmsr(msr, msr_content);
> +
> +        if ( !is_hvm_domain(curr->domain) &&
> +             vpmu->xenpmu_data && (vpmu->xenpmu_data->pmu_flags & PMU_CACHED) )
> +        {
> +            vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +            vpmu->arch_vpmu_ops->arch_vpmu_save(curr);
> +            vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        }
> +        return ret;
> +    }
>      return 0;
>  }
>  
> +static struct vcpu *choose_hwdom_vcpu(void)
> +{
> +    struct vcpu *v;
> +    unsigned idx = smp_processor_id() % hardware_domain->max_vcpus;
> +
> +    if ( hardware_domain->vcpu == NULL )
> +        return NULL;
> +
> +    v = hardware_domain->vcpu[idx];
> +
> +    /*
> +     * If index is not populated search downwards the vcpu array until
> +     * a valid vcpu can be found
> +     */
> +    while ( !v && idx-- )
> +        v = hardware_domain->vcpu[idx];
> +
> +    return v;
> +}
> +
>  int vpmu_do_interrupt(struct cpu_user_regs *regs)
>  {
> -    struct vcpu *v = current;
> -    struct vpmu_struct *vpmu = vcpu_vpmu(v);
> +    struct vcpu *sampled = current, *sampling;
> +    struct vpmu_struct *vpmu;
> +
> +    /* dom0 will handle interrupt for special domains (e.g. idle domain) */
> +    if ( sampled->domain->domain_id >= DOMID_FIRST_RESERVED )
> +    {
> +        sampling = choose_hwdom_vcpu();
> +        if ( !sampling )
> +            return 0;
> +    }
> +    else
> +        sampling = sampled;
> +
> +    vpmu = vcpu_vpmu(sampling);
> +    if ( !is_hvm_domain(sampling->domain) )
> +    {
> +        /* PV(H) guest */
> +        const struct cpu_user_regs *cur_regs;
> +
> +        if ( !vpmu->xenpmu_data )
> +            return 0;
> +
> +        if ( vpmu->xenpmu_data->pmu_flags & PMU_CACHED )
> +            return 1;
> +
> +        if ( is_pvh_domain(sampled->domain) &&
> +             !vpmu->arch_vpmu_ops->do_interrupt(regs) )
> +            return 0;
> +
> +        /* PV guest will be reading PMU MSRs from xenpmu_data */
> +        vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +        vpmu->arch_vpmu_ops->arch_vpmu_save(sampling);
> +        vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
> +
> +        /* Store appropriate registers in xenpmu_data */
> +        if ( is_pv_32bit_domain(sampling->domain) )
> +        {
> +            /*
> +             * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
> +             * and therefore we treat it the same way as a non-privileged
> +             * PV 32-bit domain.
> +             */
> +            struct compat_pmu_regs *cmp;
> +
> +            cur_regs = guest_cpu_user_regs();
> +
> +            cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
> +            cmp->eip = cur_regs->rip;
> +            cmp->esp = cur_regs->rsp;
> +            cmp->cs = cur_regs->cs;
> +            if ( (cmp->cs & 3) == 1 )
> +                cmp->cs &= ~3;
> +        }
> +        else
> +        {
> +            struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
> +
> +            /* Non-privileged domains are always in XENPMU_MODE_SELF mode */
> +            if ( (vpmu_mode & XENPMU_MODE_SELF) ||
> +                 (!is_hardware_domain(sampled->domain) &&
> +                  !is_idle_vcpu(sampled)) )
> +                cur_regs = guest_cpu_user_regs();
> +            else
> +                cur_regs = regs;
> +
> +            r->rip = cur_regs->rip;
> +            r->rsp = cur_regs->rsp;
> +
> +            if ( !is_pvh_domain(sampled->domain) )
> +            {
> +                r->cs = cur_regs->cs;
> +                if ( sampled->arch.flags & TF_kernel_mode )
> +                    r->cs &= ~3;
> +            }
> +            else
> +            {
> +                struct segment_register seg_cs;
> +
> +                hvm_get_segment_register(sampled, x86_seg_cs, &seg_cs);
> +                r->cs = seg_cs.sel;
> +            }
> +        }
> +
> +        vpmu->xenpmu_data->domain_id = DOMID_SELF;
> +        vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
> +        vpmu->xenpmu_data->pcpu_id = smp_processor_id();
> +
> +        vpmu->xenpmu_data->pmu_flags |= PMU_CACHED;
> +        vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
> +        apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
> +
> +        send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
> +
> +        return 1;
> +    }
>  
>      if ( vpmu->arch_vpmu_ops )
>      {
> -        struct vlapic *vlapic = vcpu_vlapic(v);
> +        struct vlapic *vlapic = vcpu_vlapic(sampling);
>          u32 vlapic_lvtpc;
>          unsigned char int_vec;
>  
> @@ -130,9 +277,9 @@ int vpmu_do_interrupt(struct cpu_user_regs *regs)
>          int_vec = vlapic_lvtpc & APIC_VECTOR_MASK;
>  
>          if ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) == APIC_MODE_FIXED )
> -            vlapic_set_irq(vcpu_vlapic(v), int_vec, 0);
> +            vlapic_set_irq(vcpu_vlapic(sampling), int_vec, 0);
>          else
> -            v->nmi_pending = 1;
> +            sampling->nmi_pending = 1;
>          return 1;
>      }
>  
> @@ -231,7 +378,9 @@ void vpmu_load(struct vcpu *v)
>      local_irq_enable();
>  
>      /* Only when PMU is counting, we load PMU context immediately. */
> -    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> +    if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> +         (!is_hvm_domain(v->domain) &&
> +          (vpmu->xenpmu_data->pmu_flags & PMU_CACHED)) )
>          return;
>  
>      if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
> @@ -456,6 +605,7 @@ static int vpmu_force_context_switch(void)
>  long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>  {
>      int ret = -EINVAL;
> +    struct vcpu *curr;
>      xen_pmu_params_t pmu_params;
>  
>      switch ( op )
> @@ -549,9 +699,18 @@ long do_xenpmu_op(int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
>          break;
>  
>      case XENPMU_lvtpc_set:
> -        if ( current->arch.vpmu.xenpmu_data == NULL )
> +        curr = current;
> +        if ( curr->arch.vpmu.xenpmu_data == NULL )
>              return -EINVAL;
> -        vpmu_lvtpc_update(current->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        ret = 0;
> +        break;
> +
> +    case XENPMU_flush:
> +        curr = current;
> +        curr->arch.vpmu.xenpmu_data->pmu_flags &= ~PMU_CACHED;
> +        vpmu_lvtpc_update(curr->arch.vpmu.xenpmu_data->pmu.l.lapic_lvtpc);
> +        vpmu_load(curr);
>          ret = 0;
>          break;
>      }
> diff --git a/xen/include/public/pmu.h b/xen/include/public/pmu.h
> index 68a5fb8..a1886a5 100644
> --- a/xen/include/public/pmu.h
> +++ b/xen/include/public/pmu.h
> @@ -28,6 +28,7 @@
>  #define XENPMU_init            4
>  #define XENPMU_finish          5
>  #define XENPMU_lvtpc_set       6
> +#define XENPMU_flush           7 /* Write cached MSR values to HW     */
>  /* ` } */
>  
>  /* Parameters structure for HYPERVISOR_xenpmu_op call */
> @@ -61,6 +62,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_pmu_params_t);
>   */
>  #define XENPMU_FEATURE_INTEL_BTS  1
>  
> +/*
> + * PMU MSRs are cached in the context so the PV guest doesn't need to trap to
> + * the hypervisor
> + */
> +#define PMU_CACHED 1
> +
>  /* Shared between hypervisor and PV domain */
>  struct xen_pmu_data {
>      uint32_t domain_id;
> -- 
> 1.8.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

  reply	other threads:[~2014-09-23 17:18 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-22 23:57 [PATCH v11 for-xen-4.5 00/20] x86/PMU: Xen PMU PV(H) support Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 01/20] common/symbols: Export hypervisor symbols to privileged guest Boris Ostrovsky
2014-09-23 14:28   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 02/20] x86/VPMU: Manage VPMU_CONTEXT_SAVE flag in vpmu_save_force() Boris Ostrovsky
2014-09-23 14:44   ` Konrad Rzeszutek Wilk
2014-09-23 15:06     ` Boris Ostrovsky
2014-09-23 15:26       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 03/20] x86/VPMU: Set MSR bitmaps only for HVM/PVH guests Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 04/20] x86/VPMU: Make vpmu macros a bit more efficient Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 05/20] intel/VPMU: Clean up Intel VPMU code Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 06/20] vmx: Merge MSR management routines Boris Ostrovsky
2014-09-23 14:48   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 07/20] x86/VPMU: Handle APIC_LVTPC accesses Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 08/20] intel/VPMU: MSR_CORE_PERF_GLOBAL_CTRL should be initialized to zero Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 09/20] x86/VPMU: Add public xenpmu.h Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 10/20] x86/VPMU: Make vpmu not HVM-specific Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 11/20] x86/VPMU: Interface for setting PMU mode and flags Boris Ostrovsky
2014-09-23 14:55   ` Konrad Rzeszutek Wilk
2014-09-23 18:58   ` Konrad Rzeszutek Wilk
2014-09-23 19:24     ` Boris Ostrovsky
2014-09-23 20:47       ` Daniel De Graaf
2014-09-23 21:06         ` Boris Ostrovsky
2014-09-24  8:31           ` Jan Beulich
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 12/20] x86/VPMU: Initialize PMU for PV(H) guests Boris Ostrovsky
2014-09-23 14:59   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 13/20] x86/VPMU: Save VPMU state for PV guests during context switch Boris Ostrovsky
2014-09-23 15:01   ` Konrad Rzeszutek Wilk
2014-09-23 15:10     ` Boris Ostrovsky
2014-09-23 15:18       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 14/20] x86/VPMU: When handling MSR accesses, leave fault injection to callers Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 15/20] x86/VPMU: Add support for PMU register handling on PV guests Boris Ostrovsky
2014-09-23 15:08   ` Konrad Rzeszutek Wilk
2014-09-23 15:49     ` Boris Ostrovsky
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 16/20] x86/VPMU: Handle PMU interrupts for " Boris Ostrovsky
2014-09-23 17:18   ` Konrad Rzeszutek Wilk [this message]
2014-09-23 17:36     ` Boris Ostrovsky
2014-09-23 18:31   ` Konrad Rzeszutek Wilk
2014-09-23 18:36     ` Konrad Rzeszutek Wilk
2014-09-23 18:57     ` Boris Ostrovsky
2014-09-23 19:07       ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 17/20] x86/VPMU: Merge vpmu_rdmsr and vpmu_wrmsr Boris Ostrovsky
2014-09-23 18:38   ` Konrad Rzeszutek Wilk
2014-09-22 23:57 ` [PATCH v11 for-xen-4.5 18/20] x86/VPMU: Add privileged PMU mode Boris Ostrovsky
2014-09-23 18:40   ` Konrad Rzeszutek Wilk
2014-09-22 23:58 ` [PATCH v11 for-xen-4.5 19/20] x86/VPMU: NMI-based VPMU support Boris Ostrovsky
2014-09-23 18:52   ` Konrad Rzeszutek Wilk
2014-09-23 19:18     ` Boris Ostrovsky
2014-09-23 19:27       ` Konrad Rzeszutek Wilk
2014-09-22 23:58 ` [PATCH v11 for-xen-4.5 20/20] x86/VPMU: Move VPMU files up from hvm/ directory Boris Ostrovsky
2014-09-23 18:53   ` Konrad Rzeszutek Wilk

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140923171819.GP3007@laptop.dumpdata.com \
    --to=konrad.wilk@oracle.com \
    --cc=Aravind.Gopalakrishnan@amd.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=dietmar.hahn@ts.fujitsu.com \
    --cc=jbeulich@suse.com \
    --cc=jun.nakajima@intel.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.