All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Always save/restore performance counters when HVM guest switching VCPU
@ 2013-03-01 20:49 suravee.suthikulpanit
  2013-03-01 23:02 ` Boris Ostrovsky
  2013-03-04 12:42 ` George Dunlap
  0 siblings, 2 replies; 16+ messages in thread
From: suravee.suthikulpanit @ 2013-03-01 20:49 UTC (permalink / raw)
  To: xen-devel, JBeulich; +Cc: Suravee Suthikulpanit

From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Currently, the performance counter registers are saved/restores
when the HVM guest switchs VCPUs only if they are running.
However, PERF has one check where it writes the MSR and read back
the value to check if the MSR is working.  This has shown to fails
the check if the VCPU is moved in between rdmsr and wrmsr and
resulting in the values are different.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 xen/arch/x86/hvm/svm/vpmu.c |   62 ++++++++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
index bf186fe..4854cf3 100644
--- a/xen/arch/x86/hvm/svm/vpmu.c
+++ b/xen/arch/x86/hvm/svm/vpmu.c
@@ -172,12 +172,16 @@ static inline void context_restore(struct vcpu *v)
     {
         wrmsrl(counters[i], ctxt->counters[i]);
 
+        if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
+            continue;
+
         /* Force an interrupt to allow guest reset the counter,
         if the value is positive */
         if ( is_overflowed(ctxt->counters[i]) && (ctxt->counters[i] > 0) )
         {
             gdprintk(XENLOG_WARNING, "VPMU: Force a performance counter "
-                "overflow interrupt!\n");
+                "overflow interrupt! (counter:%u value:0x%lx)\n",
+                i, ctxt->counters[i]);
             amd_vpmu_do_interrupt(0);
         }
     }
@@ -188,12 +192,13 @@ static void amd_vpmu_restore(struct vcpu *v)
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     struct amd_vpmu_context *ctxt = vpmu->context;
 
-    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
-           vpmu_is_set(vpmu, VPMU_RUNNING)) )
+    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) )
         return;
 
     context_restore(v);
-    apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc);
+
+    if ( vpmu_is_set(vpmu, VPMU_RUNNING) ) 
+        apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc);
 }
 
 static inline void context_save(struct vcpu *v)
@@ -214,13 +219,16 @@ static void amd_vpmu_save(struct vcpu *v)
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
     struct amd_vpmu_context *ctx = vpmu->context;
 
-    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
-           vpmu_is_set(vpmu, VPMU_RUNNING)) )
+    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) )
         return;
 
     context_save(v);
-    ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
-    apic_write(APIC_LVTPC,  ctx->hw_lapic_lvtpc | APIC_LVT_MASKED);
+
+    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
+    {
+        ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
+        apic_write(APIC_LVTPC,  ctx->hw_lapic_lvtpc | APIC_LVT_MASKED);
+    }
 }
 
 static void context_update(unsigned int msr, u64 msr_content)
@@ -303,25 +311,25 @@ static int amd_vpmu_initialise(struct vcpu *v)
 
     if ( counters == NULL )
     {
-         switch ( family )
-	 {
-	 case 0x15:
-	     num_counters = F15H_NUM_COUNTERS;
-	     counters = AMD_F15H_COUNTERS;
-	     ctrls = AMD_F15H_CTRLS;
-	     k7_counters_mirrored = 1;
-	     break;
-	 case 0x10:
-	 case 0x12:
-	 case 0x14:
-	 case 0x16:
-	 default:
-	     num_counters = F10H_NUM_COUNTERS;
-	     counters = AMD_F10H_COUNTERS;
-	     ctrls = AMD_F10H_CTRLS;
-	     k7_counters_mirrored = 0;
-	     break;
-	 }
+        switch ( family )
+        {
+        case 0x15:
+            num_counters = F15H_NUM_COUNTERS;
+            counters = AMD_F15H_COUNTERS;
+            ctrls = AMD_F15H_CTRLS;
+            k7_counters_mirrored = 1;
+            break;
+        case 0x10:
+        case 0x12:
+        case 0x14:
+        case 0x16:
+        default:
+            num_counters = F10H_NUM_COUNTERS;
+            counters = AMD_F10H_COUNTERS;
+            ctrls = AMD_F10H_CTRLS;
+            k7_counters_mirrored = 0;
+            break;
+        }
     }
 
     ctxt = xzalloc(struct amd_vpmu_context);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-01 20:49 suravee.suthikulpanit
@ 2013-03-01 23:02 ` Boris Ostrovsky
  2013-03-04 12:42 ` George Dunlap
  1 sibling, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2013-03-01 23:02 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: JBeulich, xen-devel

On 03/01/2013 03:49 PM, suravee.suthikulpanit@amd.com wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Currently, the performance counter registers are saved/restores
> when the HVM guest switchs VCPUs only if they are running.
> However, PERF has one check where it writes the MSR and read back
> the value to check if the MSR is working.  This has shown to fails
> the check if the VCPU is moved in between rdmsr and wrmsr and
> resulting in the values are different.
>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Description may need to be cleaned up a bit but other than that

Acked-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>

> ---
>   xen/arch/x86/hvm/svm/vpmu.c |   62 ++++++++++++++++++++++++-------------------
>   1 file changed, 35 insertions(+), 27 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/svm/vpmu.c b/xen/arch/x86/hvm/svm/vpmu.c
> index bf186fe..4854cf3 100644
> --- a/xen/arch/x86/hvm/svm/vpmu.c
> +++ b/xen/arch/x86/hvm/svm/vpmu.c
> @@ -172,12 +172,16 @@ static inline void context_restore(struct vcpu *v)
>       {
>           wrmsrl(counters[i], ctxt->counters[i]);
>   
> +        if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
> +            continue;
> +
>           /* Force an interrupt to allow guest reset the counter,
>           if the value is positive */
>           if ( is_overflowed(ctxt->counters[i]) && (ctxt->counters[i] > 0) )
>           {
>               gdprintk(XENLOG_WARNING, "VPMU: Force a performance counter "
> -                "overflow interrupt!\n");
> +                "overflow interrupt! (counter:%u value:0x%lx)\n",
> +                i, ctxt->counters[i]);
>               amd_vpmu_do_interrupt(0);
>           }
>       }
> @@ -188,12 +192,13 @@ static void amd_vpmu_restore(struct vcpu *v)
>       struct vpmu_struct *vpmu = vcpu_vpmu(v);
>       struct amd_vpmu_context *ctxt = vpmu->context;
>   
> -    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
> -           vpmu_is_set(vpmu, VPMU_RUNNING)) )
> +    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) )
>           return;
>   
>       context_restore(v);
> -    apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc);
> +
> +    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
> +        apic_write(APIC_LVTPC, ctxt->hw_lapic_lvtpc);
>   }
>   
>   static inline void context_save(struct vcpu *v)
> @@ -214,13 +219,16 @@ static void amd_vpmu_save(struct vcpu *v)
>       struct vpmu_struct *vpmu = vcpu_vpmu(v);
>       struct amd_vpmu_context *ctx = vpmu->context;
>   
> -    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) &&
> -           vpmu_is_set(vpmu, VPMU_RUNNING)) )
> +    if ( !(vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) )
>           return;
>   
>       context_save(v);
> -    ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
> -    apic_write(APIC_LVTPC,  ctx->hw_lapic_lvtpc | APIC_LVT_MASKED);
> +
> +    if ( vpmu_is_set(vpmu, VPMU_RUNNING) )
> +    {
> +        ctx->hw_lapic_lvtpc = apic_read(APIC_LVTPC);
> +        apic_write(APIC_LVTPC,  ctx->hw_lapic_lvtpc | APIC_LVT_MASKED);
> +    }
>   }
>   
>   static void context_update(unsigned int msr, u64 msr_content)
> @@ -303,25 +311,25 @@ static int amd_vpmu_initialise(struct vcpu *v)
>   
>       if ( counters == NULL )
>       {
> -         switch ( family )
> -	 {
> -	 case 0x15:
> -	     num_counters = F15H_NUM_COUNTERS;
> -	     counters = AMD_F15H_COUNTERS;
> -	     ctrls = AMD_F15H_CTRLS;
> -	     k7_counters_mirrored = 1;
> -	     break;
> -	 case 0x10:
> -	 case 0x12:
> -	 case 0x14:
> -	 case 0x16:
> -	 default:
> -	     num_counters = F10H_NUM_COUNTERS;
> -	     counters = AMD_F10H_COUNTERS;
> -	     ctrls = AMD_F10H_CTRLS;
> -	     k7_counters_mirrored = 0;
> -	     break;
> -	 }
> +        switch ( family )
> +        {
> +        case 0x15:
> +            num_counters = F15H_NUM_COUNTERS;
> +            counters = AMD_F15H_COUNTERS;
> +            ctrls = AMD_F15H_CTRLS;
> +            k7_counters_mirrored = 1;
> +            break;
> +        case 0x10:
> +        case 0x12:
> +        case 0x14:
> +        case 0x16:
> +        default:
> +            num_counters = F10H_NUM_COUNTERS;
> +            counters = AMD_F10H_COUNTERS;
> +            ctrls = AMD_F10H_CTRLS;
> +            k7_counters_mirrored = 0;
> +            break;
> +        }
>       }
>   
>       ctxt = xzalloc(struct amd_vpmu_context);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-01 20:49 suravee.suthikulpanit
  2013-03-01 23:02 ` Boris Ostrovsky
@ 2013-03-04 12:42 ` George Dunlap
  2013-03-08  8:47   ` Jan Beulich
  1 sibling, 1 reply; 16+ messages in thread
From: George Dunlap @ 2013-03-04 12:42 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: Jan Beulich, xen-devel@lists.xen.org

On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com> wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Currently, the performance counter registers are saved/restores
> when the HVM guest switchs VCPUs only if they are running.
> However, PERF has one check where it writes the MSR and read back
> the value to check if the MSR is working.  This has shown to fails
> the check if the VCPU is moved in between rdmsr and wrmsr and
> resulting in the values are different.

Many moons ago (circa 2005) when I used performance counters, I found
that adding them to the save/restore path added a non-neligible
overhead -- something like 5% slow-down.  Do you have any reason to
believe this is no longer the case?  Have you done any benchmarks
before and after?

If there is a performance slow-down, you may have to implement
something like the "lazy FPU" save/restore, where you remove access to
the VPMU MSRs to detect that the guest is accessing them.

 -George

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-04 12:42 ` George Dunlap
@ 2013-03-08  8:47   ` Jan Beulich
  2013-03-08 22:52     ` Suravee Suthikulanit
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2013-03-08  8:47 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: George Dunlap, xen-devel@lists.xen.org

>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> wrote:
> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com> wrote:
>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>
>> Currently, the performance counter registers are saved/restores
>> when the HVM guest switchs VCPUs only if they are running.
>> However, PERF has one check where it writes the MSR and read back
>> the value to check if the MSR is working.  This has shown to fails
>> the check if the VCPU is moved in between rdmsr and wrmsr and
>> resulting in the values are different.
> 
> Many moons ago (circa 2005) when I used performance counters, I found
> that adding them to the save/restore path added a non-neligible
> overhead -- something like 5% slow-down.  Do you have any reason to
> believe this is no longer the case?  Have you done any benchmarks
> before and after?
> 
> If there is a performance slow-down, you may have to implement
> something like the "lazy FPU" save/restore, where you remove access to
> the VPMU MSRs to detect that the guest is accessing them.

Suravee,

without addressing George's concerns, I don't think you can
expect the patch to be committed (the more that Boris, along
with his ack, also asked to adjust the description).

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
@ 2013-03-08 14:50 Boris Ostrovsky
  2013-03-08 14:56 ` George Dunlap
  0 siblings, 1 reply; 16+ messages in thread
From: Boris Ostrovsky @ 2013-03-08 14:50 UTC (permalink / raw)
  To: JBeulich; +Cc: George.Dunlap, suravee.suthikulpanit, xen-devel


----- JBeulich@suse.com wrote:

> >>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com>
> wrote:
> > On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
> wrote:
> >> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> >>
> >> Currently, the performance counter registers are saved/restores
> >> when the HVM guest switchs VCPUs only if they are running.
> >> However, PERF has one check where it writes the MSR and read back
> >> the value to check if the MSR is working.  This has shown to fails
> >> the check if the VCPU is moved in between rdmsr and wrmsr and
> >> resulting in the values are different.
> > 
> > Many moons ago (circa 2005) when I used performance counters, I
> found
> > that adding them to the save/restore path added a non-neligible
> > overhead -- something like 5% slow-down.  Do you have any reason to
> > believe this is no longer the case?  Have you done any benchmarks
> > before and after?

I was doing some VPMU tracing a couple of weeks ago and by looking at 
trace timestamps I think I saw about 4000 cycles on VPMU save and 
~9000 cycles on restore. Don't remember what it was percentage-wise of
a whole context switch.

This was on Intel.

-boris


> > 
> > If there is a performance slow-down, you may have to implement
> > something like the "lazy FPU" save/restore, where you remove access
> to
> > the VPMU MSRs to detect that the guest is accessing them.
> 
> Suravee,
> 
> without addressing George's concerns, I don't think you can
> expect the patch to be committed (the more that Boris, along
> with his ack, also asked to adjust the description).
> 
> Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-08 14:50 Boris Ostrovsky
@ 2013-03-08 14:56 ` George Dunlap
  2013-03-08 15:15   ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: George Dunlap @ 2013-03-08 14:56 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: suravee.suthikulpanit@amd.com, JBeulich@suse.com,
	xen-devel@lists.xen.org

On 08/03/13 14:50, Boris Ostrovsky wrote:
> ----- JBeulich@suse.com wrote:
>
>>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com>
>> wrote:
>>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
>> wrote:
>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>
>>>> Currently, the performance counter registers are saved/restores
>>>> when the HVM guest switchs VCPUs only if they are running.
>>>> However, PERF has one check where it writes the MSR and read back
>>>> the value to check if the MSR is working.  This has shown to fails
>>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>>> resulting in the values are different.
>>> Many moons ago (circa 2005) when I used performance counters, I
>> found
>>> that adding them to the save/restore path added a non-neligible
>>> overhead -- something like 5% slow-down.  Do you have any reason to
>>> believe this is no longer the case?  Have you done any benchmarks
>>> before and after?
> I was doing some VPMU tracing a couple of weeks ago and by looking at
> trace timestamps I think I saw about 4000 cycles on VPMU save and
> ~9000 cycles on restore. Don't remember what it was percentage-wise of
> a whole context switch.
>
> This was on Intel.

That's a really hefty expense to make all users pay on every context 
switch, on behalf of a random check in a piece of software that only a 
handful of people are going to be actually using.

I'm having a hard time telling what PERF is being talked about here -- 
couldn't this check be fixed on their side, by perhaps checking the 
CPUID leaf for the existence of Xen?

If not I think a "lazy vpmu activation" is going to be the only option.

  -George

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
@ 2013-03-08 15:11 Boris Ostrovsky
  2013-03-11 11:11 ` George Dunlap
  0 siblings, 1 reply; 16+ messages in thread
From: Boris Ostrovsky @ 2013-03-08 15:11 UTC (permalink / raw)
  To: george.dunlap; +Cc: JBeulich, suravee.suthikulpanit, xen-devel


----- george.dunlap@eu.citrix.com wrote:

> On 08/03/13 14:50, Boris Ostrovsky wrote:
> > ----- JBeulich@suse.com wrote:
> >
> >>>>> On 04.03.13 at 13:42, George Dunlap
> <George.Dunlap@eu.citrix.com>
> >> wrote:
> >>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
> >> wrote:
> >>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> >>>>
> >>>> Currently, the performance counter registers are saved/restores
> >>>> when the HVM guest switchs VCPUs only if they are running.
> >>>> However, PERF has one check where it writes the MSR and read
> back
> >>>> the value to check if the MSR is working.  This has shown to
> fails
> >>>> the check if the VCPU is moved in between rdmsr and wrmsr and
> >>>> resulting in the values are different.
> >>> Many moons ago (circa 2005) when I used performance counters, I
> >> found
> >>> that adding them to the save/restore path added a non-neligible
> >>> overhead -- something like 5% slow-down.  Do you have any reason
> to
> >>> believe this is no longer the case?  Have you done any benchmarks
> >>> before and after?
> > I was doing some VPMU tracing a couple of weeks ago and by looking
> at
> > trace timestamps I think I saw about 4000 cycles on VPMU save and
> > ~9000 cycles on restore. Don't remember what it was percentage-wise
> of
> > a whole context switch.
> >
> > This was on Intel.
> 
> That's a really hefty expense to make all users pay on every context 
> switch, on behalf of a random check in a piece of software that only a
> handful of people are going to be actually using.

I believe Linux uses perf infrastructure to implement the watchdog.

> 
> I'm having a hard time telling what PERF is being talked about here --
> couldn't this check be fixed on their side, by perhaps checking the 
> CPUID leaf for the existence of Xen?

If by "here" you refer to the problem that Suravee's patch is trying to
address then I suspect it's this:
  http://lxr.linux.no/#linux+v3.8.2/arch/x86/kernel/cpu/perf_event.c#L210

> 
> If not I think a "lazy vpmu activation" is going to be the only
> option.

Yes, I actually was going to look at that.

-boris

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-08 14:56 ` George Dunlap
@ 2013-03-08 15:15   ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2013-03-08 15:15 UTC (permalink / raw)
  To: George Dunlap, Boris Ostrovsky
  Cc: suravee.suthikulpanit@amd.com, xen-devel@lists.xen.org

>>> On 08.03.13 at 15:56, George Dunlap <george.dunlap@eu.citrix.com> wrote:
> On 08/03/13 14:50, Boris Ostrovsky wrote:
>> ----- JBeulich@suse.com wrote:
>>
>>>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com>
>>> wrote:
>>>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
>>> wrote:
>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>
>>>>> Currently, the performance counter registers are saved/restores
>>>>> when the HVM guest switchs VCPUs only if they are running.
>>>>> However, PERF has one check where it writes the MSR and read back
>>>>> the value to check if the MSR is working.  This has shown to fails
>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>>>> resulting in the values are different.
>>>> Many moons ago (circa 2005) when I used performance counters, I
>>> found
>>>> that adding them to the save/restore path added a non-neligible
>>>> overhead -- something like 5% slow-down.  Do you have any reason to
>>>> believe this is no longer the case?  Have you done any benchmarks
>>>> before and after?
>> I was doing some VPMU tracing a couple of weeks ago and by looking at
>> trace timestamps I think I saw about 4000 cycles on VPMU save and
>> ~9000 cycles on restore. Don't remember what it was percentage-wise of
>> a whole context switch.
>>
>> This was on Intel.
> 
> That's a really hefty expense to make all users pay on every context 
> switch, on behalf of a random check in a piece of software that only a 
> handful of people are going to be actually using.
> 
> I'm having a hard time telling what PERF is being talked about here -- 
> couldn't this check be fixed on their side, by perhaps checking the 
> CPUID leaf for the existence of Xen?
> 
> If not I think a "lazy vpmu activation" is going to be the only option.

Fully agree.

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-08  8:47   ` Jan Beulich
@ 2013-03-08 22:52     ` Suravee Suthikulanit
  0 siblings, 0 replies; 16+ messages in thread
From: Suravee Suthikulanit @ 2013-03-08 22:52 UTC (permalink / raw)
  To: Jan Beulich; +Cc: George Dunlap, xen-devel@lists.xen.org

On 3/8/2013 2:47 AM, Jan Beulich wrote:
>>>> On 04.03.13 at 13:42, George Dunlap <George.Dunlap@eu.citrix.com> wrote:
>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com> wrote:
>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>
>>> Currently, the performance counter registers are saved/restores
>>> when the HVM guest switchs VCPUs only if they are running.
>>> However, PERF has one check where it writes the MSR and read back
>>> the value to check if the MSR is working.  This has shown to fails
>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>> resulting in the values are different.
>> Many moons ago (circa 2005) when I used performance counters, I found
>> that adding them to the save/restore path added a non-neligible
>> overhead -- something like 5% slow-down.  Do you have any reason to
>> believe this is no longer the case?  Have you done any benchmarks
>> before and after?
>>
>> If there is a performance slow-down, you may have to implement
>> something like the "lazy FPU" save/restore, where you remove access to
>> the VPMU MSRs to detect that the guest is accessing them.
> Suravee,
>
> without addressing George's concerns, I don't think you can
> expect the patch to be committed (the more that Boris, along
> with his ack, also asked to adjust the description).
>
> Jan
>
>
I understand that we don't want to introduce this overhead.  Let me look 
into:
1. Measuring the overhead in this case.
2. Looking into the alternative approach (lazy save/restore)

and get back to you all.

Suravee

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-08 15:11 [PATCH] Always save/restore performance counters when HVM guest switching VCPU Boris Ostrovsky
@ 2013-03-11 11:11 ` George Dunlap
  2013-03-11 14:53   ` Konrad Rzeszutek Wilk
  0 siblings, 1 reply; 16+ messages in thread
From: George Dunlap @ 2013-03-11 11:11 UTC (permalink / raw)
  To: Boris Ostrovsky
  Cc: Konrad Rzeszutek Wilk, JBeulich@suse.com,
	suravee.suthikulpanit@amd.com, xen-devel@lists.xen.org

On 08/03/13 15:11, Boris Ostrovsky wrote:
> ----- george.dunlap@eu.citrix.com wrote:
>
>> On 08/03/13 14:50, Boris Ostrovsky wrote:
>>> ----- JBeulich@suse.com wrote:
>>>
>>>>>>> On 04.03.13 at 13:42, George Dunlap
>> <George.Dunlap@eu.citrix.com>
>>>> wrote:
>>>>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
>>>> wrote:
>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>
>>>>>> Currently, the performance counter registers are saved/restores
>>>>>> when the HVM guest switchs VCPUs only if they are running.
>>>>>> However, PERF has one check where it writes the MSR and read
>> back
>>>>>> the value to check if the MSR is working.  This has shown to
>> fails
>>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>>>>> resulting in the values are different.
>>>>> Many moons ago (circa 2005) when I used performance counters, I
>>>> found
>>>>> that adding them to the save/restore path added a non-neligible
>>>>> overhead -- something like 5% slow-down.  Do you have any reason
>> to
>>>>> believe this is no longer the case?  Have you done any benchmarks
>>>>> before and after?
>>> I was doing some VPMU tracing a couple of weeks ago and by looking
>> at
>>> trace timestamps I think I saw about 4000 cycles on VPMU save and
>>> ~9000 cycles on restore. Don't remember what it was percentage-wise
>> of
>>> a whole context switch.
>>>
>>> This was on Intel.
>> That's a really hefty expense to make all users pay on every context
>> switch, on behalf of a random check in a piece of software that only a
>> handful of people are going to be actually using.
> I believe Linux uses perf infrastructure to implement the watchdog.

Hmm -- well if it is the case that adding performance counters to the 
vcpu context switch path will add a measurable overhead, then we 
probably don't want them enabled for typical guests anyway.  If people 
are actually using the performance counters to measure performance, that 
makes sense; but for watchdogs it seems like Xen should be able to 
provide something that is useful for a watchdog without the extra 
overhead of saving and restoring performance counters.

Konrad, any thoughts?

  -George

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-11 11:11 ` George Dunlap
@ 2013-03-11 14:53   ` Konrad Rzeszutek Wilk
  2013-03-11 14:59     ` George Dunlap
                       ` (2 more replies)
  0 siblings, 3 replies; 16+ messages in thread
From: Konrad Rzeszutek Wilk @ 2013-03-11 14:53 UTC (permalink / raw)
  To: George Dunlap
  Cc: Boris Ostrovsky, JBeulich@suse.com, suravee.suthikulpanit@amd.com,
	xen-devel@lists.xen.org

On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:
> On 08/03/13 15:11, Boris Ostrovsky wrote:
> >----- george.dunlap@eu.citrix.com wrote:
> >
> >>On 08/03/13 14:50, Boris Ostrovsky wrote:
> >>>----- JBeulich@suse.com wrote:
> >>>
> >>>>>>>On 04.03.13 at 13:42, George Dunlap
> >><George.Dunlap@eu.citrix.com>
> >>>>wrote:
> >>>>>On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
> >>>>wrote:
> >>>>>>From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> >>>>>>
> >>>>>>Currently, the performance counter registers are saved/restores
> >>>>>>when the HVM guest switchs VCPUs only if they are running.
> >>>>>>However, PERF has one check where it writes the MSR and read
> >>back
> >>>>>>the value to check if the MSR is working.  This has shown to
> >>fails
> >>>>>>the check if the VCPU is moved in between rdmsr and wrmsr and
> >>>>>>resulting in the values are different.
> >>>>>Many moons ago (circa 2005) when I used performance counters, I
> >>>>found
> >>>>>that adding them to the save/restore path added a non-neligible
> >>>>>overhead -- something like 5% slow-down.  Do you have any reason
> >>to
> >>>>>believe this is no longer the case?  Have you done any benchmarks
> >>>>>before and after?
> >>>I was doing some VPMU tracing a couple of weeks ago and by looking
> >>at
> >>>trace timestamps I think I saw about 4000 cycles on VPMU save and
> >>>~9000 cycles on restore. Don't remember what it was percentage-wise
> >>of
> >>>a whole context switch.
> >>>
> >>>This was on Intel.
> >>That's a really hefty expense to make all users pay on every context
> >>switch, on behalf of a random check in a piece of software that only a
> >>handful of people are going to be actually using.
> >I believe Linux uses perf infrastructure to implement the watchdog.

And by default it won't work as for Intel you need these flags:

cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' ]

What we get right now when booting PVHVM under Intel is:

[    0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only.
[    0.168098] NMI watchdog disabled (cpu0): hardware events not enabled

Unless said above CPUID flag is provided.
> 
> Hmm -- well if it is the case that adding performance counters to
> the vcpu context switch path will add a measurable overhead, then we
> probably don't want them enabled for typical guests anyway.  If
> people are actually using the performance counters to measure
> performance, that makes sense; but for watchdogs it seems like Xen
> should be able to provide something that is useful for a watchdog
> without the extra overhead of saving and restoring performance
> counters.
> 
> Konrad, any thoughts?

The other thing is that there is an Xen watchdog. The one that Jan Beulich
wrote which should also work under PVHVM:

drivers/watchdog/xen_wdt.c


> 
>  -George

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-11 14:53   ` Konrad Rzeszutek Wilk
@ 2013-03-11 14:59     ` George Dunlap
  2013-03-11 15:54       ` Boris Ostrovsky
  2013-03-11 16:03     ` Jan Beulich
  2013-03-12  8:18     ` Dietmar Hahn
  2 siblings, 1 reply; 16+ messages in thread
From: George Dunlap @ 2013-03-11 14:59 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Boris Ostrovsky, JBeulich@suse.com, suravee.suthikulpanit@amd.com,
	xen-devel@lists.xen.org

On 11/03/13 14:53, Konrad Rzeszutek Wilk wrote:
> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:
>> On 08/03/13 15:11, Boris Ostrovsky wrote:
>>> ----- george.dunlap@eu.citrix.com wrote:
>>>
>>>> On 08/03/13 14:50, Boris Ostrovsky wrote:
>>>>> ----- JBeulich@suse.com wrote:
>>>>>
>>>>>>>>> On 04.03.13 at 13:42, George Dunlap
>>>> <George.Dunlap@eu.citrix.com>
>>>>>> wrote:
>>>>>>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
>>>>>> wrote:
>>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>>>
>>>>>>>> Currently, the performance counter registers are saved/restores
>>>>>>>> when the HVM guest switchs VCPUs only if they are running.
>>>>>>>> However, PERF has one check where it writes the MSR and read
>>>> back
>>>>>>>> the value to check if the MSR is working.  This has shown to
>>>> fails
>>>>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>>>>>>> resulting in the values are different.
>>>>>>> Many moons ago (circa 2005) when I used performance counters, I
>>>>>> found
>>>>>>> that adding them to the save/restore path added a non-neligible
>>>>>>> overhead -- something like 5% slow-down.  Do you have any reason
>>>> to
>>>>>>> believe this is no longer the case?  Have you done any benchmarks
>>>>>>> before and after?
>>>>> I was doing some VPMU tracing a couple of weeks ago and by looking
>>>> at
>>>>> trace timestamps I think I saw about 4000 cycles on VPMU save and
>>>>> ~9000 cycles on restore. Don't remember what it was percentage-wise
>>>> of
>>>>> a whole context switch.
>>>>>
>>>>> This was on Intel.
>>>> That's a really hefty expense to make all users pay on every context
>>>> switch, on behalf of a random check in a piece of software that only a
>>>> handful of people are going to be actually using.
>>> I believe Linux uses perf infrastructure to implement the watchdog.
> And by default it won't work as for Intel you need these flags:
>
> cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' ]
>
> What we get right now when booting PVHVM under Intel is:
>
> [    0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only.
> [    0.168098] NMI watchdog disabled (cpu0): hardware events not enabled
>
> Unless said above CPUID flag is provided.
>> Hmm -- well if it is the case that adding performance counters to
>> the vcpu context switch path will add a measurable overhead, then we
>> probably don't want them enabled for typical guests anyway.  If
>> people are actually using the performance counters to measure
>> performance, that makes sense; but for watchdogs it seems like Xen
>> should be able to provide something that is useful for a watchdog
>> without the extra overhead of saving and restoring performance
>> counters.
>>
>> Konrad, any thoughts?
> The other thing is that there is an Xen watchdog. The one that Jan Beulich
> wrote which should also work under PVHVM:
>
> drivers/watchdog/xen_wdt.c

But my main question is: If the Linux perf system successfully detects a 
vpmu, will it use the Xen watchdog, or will it try to use the vpmu?  Do 
we need to do anything to make sure that when running under Xen, Linux 
will *not* try to use the vpmu for the watchdog?

  -George

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-11 14:59     ` George Dunlap
@ 2013-03-11 15:54       ` Boris Ostrovsky
  0 siblings, 0 replies; 16+ messages in thread
From: Boris Ostrovsky @ 2013-03-11 15:54 UTC (permalink / raw)
  To: George Dunlap
  Cc: xen-devel@lists.xen.org, suravee.suthikulpanit@amd.com,
	JBeulich@suse.com, Konrad Rzeszutek Wilk

On 03/11/2013 10:59 AM, George Dunlap wrote:
> On 11/03/13 14:53, Konrad Rzeszutek Wilk wrote:
>> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:
>>> On 08/03/13 15:11, Boris Ostrovsky wrote:
>>>> ----- george.dunlap@eu.citrix.com wrote:
>>>>
>>>>> On 08/03/13 14:50, Boris Ostrovsky wrote:
>>>>>> ----- JBeulich@suse.com wrote:
>>>>>>
>>>>>>>>>> On 04.03.13 at 13:42, George Dunlap
>>>>> <George.Dunlap@eu.citrix.com>
>>>>>>> wrote:
>>>>>>>> On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
>>>>>>> wrote:
>>>>>>>>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>>>>>>>>
>>>>>>>>> Currently, the performance counter registers are saved/restores
>>>>>>>>> when the HVM guest switchs VCPUs only if they are running.
>>>>>>>>> However, PERF has one check where it writes the MSR and read
>>>>> back
>>>>>>>>> the value to check if the MSR is working.  This has shown to
>>>>> fails
>>>>>>>>> the check if the VCPU is moved in between rdmsr and wrmsr and
>>>>>>>>> resulting in the values are different.
>>>>>>>> Many moons ago (circa 2005) when I used performance counters, I
>>>>>>> found
>>>>>>>> that adding them to the save/restore path added a non-neligible
>>>>>>>> overhead -- something like 5% slow-down.  Do you have any reason
>>>>> to
>>>>>>>> believe this is no longer the case?  Have you done any benchmarks
>>>>>>>> before and after?
>>>>>> I was doing some VPMU tracing a couple of weeks ago and by looking
>>>>> at
>>>>>> trace timestamps I think I saw about 4000 cycles on VPMU save and
>>>>>> ~9000 cycles on restore. Don't remember what it was percentage-wise
>>>>> of
>>>>>> a whole context switch.
>>>>>>
>>>>>> This was on Intel.
>>>>> That's a really hefty expense to make all users pay on every context
>>>>> switch, on behalf of a random check in a piece of software that 
>>>>> only a
>>>>> handful of people are going to be actually using.
>>>> I believe Linux uses perf infrastructure to implement the watchdog.
>> And by default it won't work as for Intel you need these flags:
>>
>> cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' 
>> ]
>>
>> What we get right now when booting PVHVM under Intel is:
>>
>> [    0.160989] Performance Events: unsupported p6 CPU model 45 no PMU 
>> driver, software events only.
>> [    0.168098] NMI watchdog disabled (cpu0): hardware events not enabled
>>
>> Unless said above CPUID flag is provided.
>>> Hmm -- well if it is the case that adding performance counters to
>>> the vcpu context switch path will add a measurable overhead, then we
>>> probably don't want them enabled for typical guests anyway. If
>>> people are actually using the performance counters to measure
>>> performance, that makes sense; but for watchdogs it seems like Xen
>>> should be able to provide something that is useful for a watchdog
>>> without the extra overhead of saving and restoring performance
>>> counters.
>>>
>>> Konrad, any thoughts?
>> The other thing is that there is an Xen watchdog. The one that Jan 
>> Beulich
>> wrote which should also work under PVHVM:
>>
>> drivers/watchdog/xen_wdt.c
>
> But my main question is: If the Linux perf system successfully detects 
> a vpmu, will it use the Xen watchdog, or will it try to use the vpmu?  
> Do we need to do anything to make sure that when running under Xen, 
> Linux will *not* try to use the vpmu for the watchdog?


It looks to me that both watchdogs are running. Perf's counter 0 (which 
is what watchdog uses) is definitely enabled.

-boris

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-11 14:53   ` Konrad Rzeszutek Wilk
  2013-03-11 14:59     ` George Dunlap
@ 2013-03-11 16:03     ` Jan Beulich
  2013-03-12  8:18     ` Dietmar Hahn
  2 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2013-03-11 16:03 UTC (permalink / raw)
  To: George Dunlap, Konrad Rzeszutek Wilk
  Cc: Boris Ostrovsky, suravee.suthikulpanit@amd.com,
	xen-devel@lists.xen.org

>>> On 11.03.13 at 15:53, Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> wrote:
> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:
>> Konrad, any thoughts?
> 
> The other thing is that there is an Xen watchdog. The one that Jan Beulich
> wrote which should also work under PVHVM:
> 
> drivers/watchdog/xen_wdt.c

But that's a different kind of watchdog. The perf one is to check
CPUs are alive, whereas this driver is a system (VM) wide one.

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-11 14:53   ` Konrad Rzeszutek Wilk
  2013-03-11 14:59     ` George Dunlap
  2013-03-11 16:03     ` Jan Beulich
@ 2013-03-12  8:18     ` Dietmar Hahn
  2013-03-12 15:12       ` Konrad Rzeszutek Wilk
  2 siblings, 1 reply; 16+ messages in thread
From: Dietmar Hahn @ 2013-03-12  8:18 UTC (permalink / raw)
  To: xen-devel
  Cc: George Dunlap, Boris Ostrovsky, suravee.suthikulpanit@amd.com,
	JBeulich@suse.com, Konrad Rzeszutek Wilk

Am Montag 11 März 2013, 10:53:49 schrieb Konrad Rzeszutek Wilk:
> On Mon, Mar 11, 2013 at 11:11:02AM +0000, George Dunlap wrote:
> > On 08/03/13 15:11, Boris Ostrovsky wrote:
> > >----- george.dunlap@eu.citrix.com wrote:
> > >
> > >>On 08/03/13 14:50, Boris Ostrovsky wrote:
> > >>>----- JBeulich@suse.com wrote:
> > >>>
> > >>>>>>>On 04.03.13 at 13:42, George Dunlap
> > >><George.Dunlap@eu.citrix.com>
> > >>>>wrote:
> > >>>>>On Fri, Mar 1, 2013 at 8:49 PM,  <suravee.suthikulpanit@amd.com>
> > >>>>wrote:
> > >>>>>>From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> > >>>>>>
> > >>>>>>Currently, the performance counter registers are saved/restores
> > >>>>>>when the HVM guest switchs VCPUs only if they are running.
> > >>>>>>However, PERF has one check where it writes the MSR and read
> > >>back
> > >>>>>>the value to check if the MSR is working.  This has shown to
> > >>fails
> > >>>>>>the check if the VCPU is moved in between rdmsr and wrmsr and
> > >>>>>>resulting in the values are different.
> > >>>>>Many moons ago (circa 2005) when I used performance counters, I
> > >>>>found
> > >>>>>that adding them to the save/restore path added a non-neligible
> > >>>>>overhead -- something like 5% slow-down.  Do you have any reason
> > >>to
> > >>>>>believe this is no longer the case?  Have you done any benchmarks
> > >>>>>before and after?
> > >>>I was doing some VPMU tracing a couple of weeks ago and by looking
> > >>at
> > >>>trace timestamps I think I saw about 4000 cycles on VPMU save and
> > >>>~9000 cycles on restore. Don't remember what it was percentage-wise
> > >>of
> > >>>a whole context switch.
> > >>>
> > >>>This was on Intel.
> > >>That's a really hefty expense to make all users pay on every context
> > >>switch, on behalf of a random check in a piece of software that only a
> > >>handful of people are going to be actually using.
> > >I believe Linux uses perf infrastructure to implement the watchdog.
> 
> And by default it won't work as for Intel you need these flags:
> 
> cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' ]

This cpuid config variable should not be needed if your cpu is supported in
vmx_vpmu_initialise() where you added a lot of processors with your patch.
If not supported and you should see a message in the xen logs.

> 
> What we get right now when booting PVHVM under Intel is:
> 
> [    0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only.
> [    0.168098] NMI watchdog disabled (cpu0): hardware events not enabled

Did you add vpmu to the xen boot parameter list?

I installed opensuse-12.2 as a HVM guest with xen-unstable running and the kernel
log says:

Mar  7 15:06:18 linux kernel: [    0.183217] CPU0: Intel(R) Core(TM)2 Duo CPU     P8800  @ 2.66GHz stepping 0a
Mar  7 15:06:18 linux kernel: [    0.183980] Performance Events: 4-deep LBR, Core2 events, Intel PMU driver.
Mar  7 15:06:18 linux kernel: [    0.189994] ... version:                2
Mar  7 15:06:18 linux kernel: [    0.189997] ... bit width:              40
Mar  7 15:06:18 linux kernel: [    0.190000] ... generic registers:      2
Mar  7 15:06:18 linux kernel: [    0.190002] ... value mask:             000000ffffffffff
Mar  7 15:06:18 linux kernel: [    0.190005] ... max period:             000000007fffffff
Mar  7 15:06:18 linux kernel: [    0.190008] ... fixed-purpose events:   3
Mar  7 15:06:18 linux kernel: [    0.190011] ... event mask:             0000000700000003
Mar  7 15:06:18 linux kernel: [    0.198203] NMI watchdog: enabled, takes one hw-pmu counter.

When I call perf:

# perf stat ls
acpid             cups      kdm.log        mail.err        news              wtmp            zypper.log
alternatives.log  faillog   krb5           mail.info       ntp               Xorg.0.log
boot.log          firewall  lastlog        mail.warn       pm-powersave.log  Xorg.0.log.old
btmp              hp        localmessages  messages        samba             YaST2
ConsoleKit        journal   mail           NetworkManager  warn              zypp

 Performance counter stats for 'ls':

          7.840869 task-clock                #    0.590 CPUs utilized          
                59 context-switches          #    0.008 M/sec                  
                 0 CPU-migrations            #    0.000 K/sec                  
               304 page-faults               #    0.039 M/sec                  
         6,583,834 cycles                    #    0.840 GHz                     [40.38%]
   <not supported> stalled-cycles-frontend 
   <not supported> stalled-cycles-backend  
         2,168,931 instructions              #    0.33  insns per cycle         [73.20%]
           525,628 branches                  #   67.037 M/sec                   [79.06%]
            27,138 branch-misses             #    5.16% of all branches         [83.55%]

       0.013283672 seconds time elapsed

As you can see performance counters are working for instructions, branches
and branch-misses.

When I call this command in the dom0 it's a bit different:

# perf stat ls
acpid             journal        messages           wpa_supplicant.log
alternatives.log  kdm.log        NetworkManager     wtmp
boot.log          krb5           news               xen
btmp              lastlog        ntp                Xorg.0.log
ConsoleKit        localmessages  pk_backend_zypp    Xorg.0.log.old
cups              mail           pk_backend_zypp-1  YaST2
faillog           mail.err       pm-powersave.log   zypp
firewall          mail.info      samba              zypper.log
hp                mail.warn      warn               zypper.log-20130307.xz

 Performance counter stats for 'ls':

          6.959326 task-clock                #    0.714 CPUs utilized          
                11 context-switches          #    0.002 M/sec                  
                 0 CPU-migrations            #    0.000 K/sec                  
               304 page-faults               #    0.044 M/sec                  
   <not supported> cycles                  
   <not supported> stalled-cycles-frontend 
   <not supported> stalled-cycles-backend  
   <not supported> instructions            
   <not supported> branches                
   <not supported> branch-misses           

       0.009746152 seconds time elapsed

This is because the hardware events are not supported in PV.

Dietmar.


> Unless said above CPUID flag is provided.
> > 
> > Hmm -- well if it is the case that adding performance counters to
> > the vcpu context switch path will add a measurable overhead, then we
> > probably don't want them enabled for typical guests anyway.  If
> > people are actually using the performance counters to measure
> > performance, that makes sense; but for watchdogs it seems like Xen
> > should be able to provide something that is useful for a watchdog
> > without the extra overhead of saving and restoring performance
> > counters.
> > 
> > Konrad, any thoughts?
> 
> The other thing is that there is an Xen watchdog. The one that Jan Beulich
> wrote which should also work under PVHVM:
> 
> drivers/watchdog/xen_wdt.c
> 
> 
> > 
> >  -George

-- 
Company details: http://ts.fujitsu.com/imprint.html

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] Always save/restore performance counters when HVM guest switching VCPU
  2013-03-12  8:18     ` Dietmar Hahn
@ 2013-03-12 15:12       ` Konrad Rzeszutek Wilk
  0 siblings, 0 replies; 16+ messages in thread
From: Konrad Rzeszutek Wilk @ 2013-03-12 15:12 UTC (permalink / raw)
  To: Dietmar Hahn
  Cc: George Dunlap, Boris Ostrovsky, suravee.suthikulpanit@amd.com,
	JBeulich@suse.com, xen-devel

> > > >>>This was on Intel.
> > > >>That's a really hefty expense to make all users pay on every context
> > > >>switch, on behalf of a random check in a piece of software that only a
> > > >>handful of people are going to be actually using.
> > > >I believe Linux uses perf infrastructure to implement the watchdog.


And as mentioned here, this never gets enabled by default b/c to use
the perf infrastructure you need 'vpmu=1' on the bootup line.

> > 
> > And by default it won't work as for Intel you need these flags:
> > 
> > cpuid=['0xa:eax=0x07300403,ebx=0x00000004,ecx=0x00000000,edx=0x00000603' ]
> 
> This cpuid config variable should not be needed if your cpu is supported in
> vmx_vpmu_initialise() where you added a lot of processors with your patch.
> If not supported and you should see a message in the xen logs.

Found out that is b/c I was using 'xend'. With that there are some
CPUID flags cleared while 'xl' does not do it. 
> 
> > 
> > What we get right now when booting PVHVM under Intel is:
> > 
> > [    0.160989] Performance Events: unsupported p6 CPU model 45 no PMU driver, software events only.
> > [    0.168098] NMI watchdog disabled (cpu0): hardware events not enabled
> 
> Did you add vpmu to the xen boot parameter list?

Yes.
> 
> I installed opensuse-12.2 as a HVM guest with xen-unstable running and the kernel
> log says:

With Xen 4.3 and using 'xl' I get it working too. Albeit on that
particular box (model 45) I am hitting some weird crashes while on
on a lesser SandyBridge (model 44) it works OK. Anyhow that is
a different thread.

> 
> Mar  7 15:06:18 linux kernel: [    0.183217] CPU0: Intel(R) Core(TM)2 Duo CPU     P8800  @ 2.66GHz stepping 0a
> Mar  7 15:06:18 linux kernel: [    0.183980] Performance Events: 4-deep LBR, Core2 events, Intel PMU driver.
> Mar  7 15:06:18 linux kernel: [    0.189994] ... version:                2
> Mar  7 15:06:18 linux kernel: [    0.189997] ... bit width:              40
> Mar  7 15:06:18 linux kernel: [    0.190000] ... generic registers:      2
> Mar  7 15:06:18 linux kernel: [    0.190002] ... value mask:             000000ffffffffff
> Mar  7 15:06:18 linux kernel: [    0.190005] ... max period:             000000007fffffff
> Mar  7 15:06:18 linux kernel: [    0.190008] ... fixed-purpose events:   3
> Mar  7 15:06:18 linux kernel: [    0.190011] ... event mask:             0000000700000003
> Mar  7 15:06:18 linux kernel: [    0.198203] NMI watchdog: enabled, takes one hw-pmu counter.
> 
> When I call perf:
> 
> # perf stat ls
> acpid             cups      kdm.log        mail.err        news              wtmp            zypper.log
> alternatives.log  faillog   krb5           mail.info       ntp               Xorg.0.log
> boot.log          firewall  lastlog        mail.warn       pm-powersave.log  Xorg.0.log.old
> btmp              hp        localmessages  messages        samba             YaST2
> ConsoleKit        journal   mail           NetworkManager  warn              zypp
> 
>  Performance counter stats for 'ls':
> 
>           7.840869 task-clock                #    0.590 CPUs utilized          
>                 59 context-switches          #    0.008 M/sec                  
>                  0 CPU-migrations            #    0.000 K/sec                  
>                304 page-faults               #    0.039 M/sec                  
>          6,583,834 cycles                    #    0.840 GHz                     [40.38%]
>    <not supported> stalled-cycles-frontend 
>    <not supported> stalled-cycles-backend  
>          2,168,931 instructions              #    0.33  insns per cycle         [73.20%]
>            525,628 branches                  #   67.037 M/sec                   [79.06%]
>             27,138 branch-misses             #    5.16% of all branches         [83.55%]
> 
>        0.013283672 seconds time elapsed
> 
> As you can see performance counters are working for instructions, branches
> and branch-misses.
> 
> When I call this command in the dom0 it's a bit different:
> 
> # perf stat ls
> acpid             journal        messages           wpa_supplicant.log
> alternatives.log  kdm.log        NetworkManager     wtmp
> boot.log          krb5           news               xen
> btmp              lastlog        ntp                Xorg.0.log
> ConsoleKit        localmessages  pk_backend_zypp    Xorg.0.log.old
> cups              mail           pk_backend_zypp-1  YaST2
> faillog           mail.err       pm-powersave.log   zypp
> firewall          mail.info      samba              zypper.log
> hp                mail.warn      warn               zypper.log-20130307.xz
> 
>  Performance counter stats for 'ls':
> 
>           6.959326 task-clock                #    0.714 CPUs utilized          
>                 11 context-switches          #    0.002 M/sec                  
>                  0 CPU-migrations            #    0.000 K/sec                  
>                304 page-faults               #    0.044 M/sec                  
>    <not supported> cycles                  
>    <not supported> stalled-cycles-frontend 
>    <not supported> stalled-cycles-backend  
>    <not supported> instructions            
>    <not supported> branches                
>    <not supported> branch-misses           
> 
>        0.009746152 seconds time elapsed
> 
> This is because the hardware events are not supported in PV.

Right.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2013-03-12 15:12 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-08 15:11 [PATCH] Always save/restore performance counters when HVM guest switching VCPU Boris Ostrovsky
2013-03-11 11:11 ` George Dunlap
2013-03-11 14:53   ` Konrad Rzeszutek Wilk
2013-03-11 14:59     ` George Dunlap
2013-03-11 15:54       ` Boris Ostrovsky
2013-03-11 16:03     ` Jan Beulich
2013-03-12  8:18     ` Dietmar Hahn
2013-03-12 15:12       ` Konrad Rzeszutek Wilk
  -- strict thread matches above, loose matches on Subject: below --
2013-03-08 14:50 Boris Ostrovsky
2013-03-08 14:56 ` George Dunlap
2013-03-08 15:15   ` Jan Beulich
2013-03-01 20:49 suravee.suthikulpanit
2013-03-01 23:02 ` Boris Ostrovsky
2013-03-04 12:42 ` George Dunlap
2013-03-08  8:47   ` Jan Beulich
2013-03-08 22:52     ` Suravee Suthikulanit

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.