[PATCH 6/9] perf: expose perf capability to other modules.

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 6/9] perf: expose perf capability to other modules.
  2011-10-30 16:53 Gleb Natapov
@ 2011-10-30 16:53 ` Gleb Natapov
  2011-11-01 10:49   ` Avi Kivity
  2011-11-01 15:49   ` David Ahern
  0 siblings, 2 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-10-30 16:53 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

KVM needs to know perf capability to decide which PMU it can expose to a
guest.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/perf_event.h      |   11 +++++++++++
 arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
 arch/x86/kernel/cpu/perf_event.h       |    2 ++
 arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
 4 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f..7d7e57f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
+struct x86_pmu_capability {
+	int version;
+	int num_counters_gp;
+	int num_counters_fixed;
+	int bit_width_gp;
+	int bit_width_fixed;
+	unsigned int events_mask;
+	int events_mask_len;
+};
+
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 #else
 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6408910..94ac9ca 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1570,3 +1570,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 
 	return misc;
 }
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+	cap->version = x86_pmu.version;
+	cap->num_counters_gp = x86_pmu.num_counters;
+	cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+	cap->bit_width_gp = cap->bit_width_fixed = x86_pmu.cntval_bits;
+	cap->events_mask = x86_pmu.events_mask;
+	cap->events_mask_len = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d4..e9ed238 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -259,6 +259,8 @@ struct x86_pmu {
 	int		num_counters_fixed;
 	int		cntval_bits;
 	u64		cntval_mask;
+	u32		events_mask;
+	int		events_mask_len;
 	int		apic;
 	u64		max_period;
 	struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e09ca20..64e5f35 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1580,6 +1580,8 @@ __init int intel_pmu_init(void)
 	x86_pmu.num_counters		= eax.split.num_counters;
 	x86_pmu.cntval_bits		= eax.split.bit_width;
 	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.events_mask		= ebx;
+	x86_pmu.events_mask_len		= eax.split.mask_length;
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -1651,6 +1653,7 @@ __init int intel_pmu_init(void)
 			 * architectural event which is often completely bogus:
 			 */
 			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+			x86_pmu.events_mask &= ~0x40;
 
 			pr_cont("erratum AAJ80 worked around, ");
 		}
-- 
1.7.5.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-10-30 16:53 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
@ 2011-11-01 10:49   ` Avi Kivity
  2011-11-01 15:49   ` David Ahern
  1 sibling, 0 replies; 22+ messages in thread
From: Avi Kivity @ 2011-11-01 10:49 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, mtosatti, mingo, a.p.zijlstra, acme

On 10/30/2011 06:53 PM, Gleb Natapov wrote:
> KVM needs to know perf capability to decide which PMU it can expose to a
> guest.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
>  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
>  arch/x86/kernel/cpu/perf_event.h       |    2 ++
>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
>  4 files changed, 27 insertions(+), 0 deletions(-)
>
>

Peter, can you please review this, and if all is well, either apply or ack?

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-10-30 16:53 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
  2011-11-01 10:49   ` Avi Kivity
@ 2011-11-01 15:49   ` David Ahern
  2011-11-01 16:13     ` Gleb Natapov
  1 sibling, 1 reply; 22+ messages in thread
From: David Ahern @ 2011-11-01 15:49 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm, avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

On 10/30/2011 10:53 AM, Gleb Natapov wrote:
> KVM needs to know perf capability to decide which PMU it can expose to a
> guest.
> 
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
>  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
>  arch/x86/kernel/cpu/perf_event.h       |    2 ++
>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
>  4 files changed, 27 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index f61c62f..7d7e57f 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
>  	u64 host, guest;
>  };
>  
> +struct x86_pmu_capability {
> +	int version;
> +	int num_counters_gp;
> +	int num_counters_fixed;
> +	int bit_width_gp;
> +	int bit_width_fixed;
> +	unsigned int events_mask;
> +	int events_mask_len;
> +};
> +
>  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
> +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
>  #else
>  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
>  {

What about version of perf_get_x86_pmu_capability for CONFIG_PERF_EVENTS
not enabled in host kernel? Next patch for KVM assumes the function is
defined.

David


> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 6408910..94ac9ca 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1570,3 +1570,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
>  
>  	return misc;
>  }
> +
> +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
> +{
> +	cap->version = x86_pmu.version;
> +	cap->num_counters_gp = x86_pmu.num_counters;
> +	cap->num_counters_fixed = x86_pmu.num_counters_fixed;
> +	cap->bit_width_gp = cap->bit_width_fixed = x86_pmu.cntval_bits;
> +	cap->events_mask = x86_pmu.events_mask;
> +	cap->events_mask_len = x86_pmu.events_mask_len;
> +}
> +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index b9698d4..e9ed238 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -259,6 +259,8 @@ struct x86_pmu {
>  	int		num_counters_fixed;
>  	int		cntval_bits;
>  	u64		cntval_mask;
> +	u32		events_mask;
> +	int		events_mask_len;
>  	int		apic;
>  	u64		max_period;
>  	struct event_constraint *
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index e09ca20..64e5f35 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1580,6 +1580,8 @@ __init int intel_pmu_init(void)
>  	x86_pmu.num_counters		= eax.split.num_counters;
>  	x86_pmu.cntval_bits		= eax.split.bit_width;
>  	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
> +	x86_pmu.events_mask		= ebx;
> +	x86_pmu.events_mask_len		= eax.split.mask_length;
>  
>  	/*
>  	 * Quirk: v2 perfmon does not report fixed-purpose events, so
> @@ -1651,6 +1653,7 @@ __init int intel_pmu_init(void)
>  			 * architectural event which is often completely bogus:
>  			 */
>  			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
> +			x86_pmu.events_mask &= ~0x40;
>  
>  			pr_cont("erratum AAJ80 worked around, ");
>  		}


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-01 15:49   ` David Ahern
@ 2011-11-01 16:13     ` Gleb Natapov
  2011-11-01 16:20       ` David Ahern
  0 siblings, 1 reply; 22+ messages in thread
From: Gleb Natapov @ 2011-11-01 16:13 UTC (permalink / raw)
  To: David Ahern; +Cc: kvm, avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

On Tue, Nov 01, 2011 at 09:49:19AM -0600, David Ahern wrote:
> On 10/30/2011 10:53 AM, Gleb Natapov wrote:
> > KVM needs to know perf capability to decide which PMU it can expose to a
> > guest.
> > 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
> >  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
> >  arch/x86/kernel/cpu/perf_event.h       |    2 ++
> >  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
> >  4 files changed, 27 insertions(+), 0 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> > index f61c62f..7d7e57f 100644
> > --- a/arch/x86/include/asm/perf_event.h
> > +++ b/arch/x86/include/asm/perf_event.h
> > @@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
> >  	u64 host, guest;
> >  };
> >  
> > +struct x86_pmu_capability {
> > +	int version;
> > +	int num_counters_gp;
> > +	int num_counters_fixed;
> > +	int bit_width_gp;
> > +	int bit_width_fixed;
> > +	unsigned int events_mask;
> > +	int events_mask_len;
> > +};
> > +
> >  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
> > +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
> >  #else
> >  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
> >  {
> 
> What about version of perf_get_x86_pmu_capability for CONFIG_PERF_EVENTS
> not enabled in host kernel? Next patch for KVM assumes the function is
> defined.
> 
As far as I understand it is not possible to build x86 without
CONFIG_PERF_EVENTS right now. Actually kvm pmu code depends on
CONFIG_PERF_EVENTS been enabled. I can easily provide the stub if
needed though.

> David
> 
> 
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 6408910..94ac9ca 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -1570,3 +1570,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
> >  
> >  	return misc;
> >  }
> > +
> > +void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
> > +{
> > +	cap->version = x86_pmu.version;
> > +	cap->num_counters_gp = x86_pmu.num_counters;
> > +	cap->num_counters_fixed = x86_pmu.num_counters_fixed;
> > +	cap->bit_width_gp = cap->bit_width_fixed = x86_pmu.cntval_bits;
> > +	cap->events_mask = x86_pmu.events_mask;
> > +	cap->events_mask_len = x86_pmu.events_mask_len;
> > +}
> > +EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
> > diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> > index b9698d4..e9ed238 100644
> > --- a/arch/x86/kernel/cpu/perf_event.h
> > +++ b/arch/x86/kernel/cpu/perf_event.h
> > @@ -259,6 +259,8 @@ struct x86_pmu {
> >  	int		num_counters_fixed;
> >  	int		cntval_bits;
> >  	u64		cntval_mask;
> > +	u32		events_mask;
> > +	int		events_mask_len;
> >  	int		apic;
> >  	u64		max_period;
> >  	struct event_constraint *
> > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> > index e09ca20..64e5f35 100644
> > --- a/arch/x86/kernel/cpu/perf_event_intel.c
> > +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> > @@ -1580,6 +1580,8 @@ __init int intel_pmu_init(void)
> >  	x86_pmu.num_counters		= eax.split.num_counters;
> >  	x86_pmu.cntval_bits		= eax.split.bit_width;
> >  	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
> > +	x86_pmu.events_mask		= ebx;
> > +	x86_pmu.events_mask_len		= eax.split.mask_length;
> >  
> >  	/*
> >  	 * Quirk: v2 perfmon does not report fixed-purpose events, so
> > @@ -1651,6 +1653,7 @@ __init int intel_pmu_init(void)
> >  			 * architectural event which is often completely bogus:
> >  			 */
> >  			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
> > +			x86_pmu.events_mask &= ~0x40;
> >  
> >  			pr_cont("erratum AAJ80 worked around, ");
> >  		}

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-01 16:13     ` Gleb Natapov
@ 2011-11-01 16:20       ` David Ahern
  2011-11-01 16:41         ` Gleb Natapov
  2011-11-02  7:42         ` Frederic Weisbecker
  0 siblings, 2 replies; 22+ messages in thread
From: David Ahern @ 2011-11-01 16:20 UTC (permalink / raw)
  To: Gleb Natapov
  Cc: kvm, avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme,
	Frederic Weisbecker



On 11/01/2011 10:13 AM, Gleb Natapov wrote:
> On Tue, Nov 01, 2011 at 09:49:19AM -0600, David Ahern wrote:
>> On 10/30/2011 10:53 AM, Gleb Natapov wrote:
>>> KVM needs to know perf capability to decide which PMU it can expose to a
>>> guest.
>>>
>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>> ---
>>>  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
>>>  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
>>>  arch/x86/kernel/cpu/perf_event.h       |    2 ++
>>>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
>>>  4 files changed, 27 insertions(+), 0 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>>> index f61c62f..7d7e57f 100644
>>> --- a/arch/x86/include/asm/perf_event.h
>>> +++ b/arch/x86/include/asm/perf_event.h
>>> @@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
>>>  	u64 host, guest;
>>>  };
>>>  
>>> +struct x86_pmu_capability {
>>> +	int version;
>>> +	int num_counters_gp;
>>> +	int num_counters_fixed;
>>> +	int bit_width_gp;
>>> +	int bit_width_fixed;
>>> +	unsigned int events_mask;
>>> +	int events_mask_len;
>>> +};
>>> +
>>>  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
>>> +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
>>>  #else
>>>  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
>>>  {
>>
>> What about version of perf_get_x86_pmu_capability for CONFIG_PERF_EVENTS
>> not enabled in host kernel? Next patch for KVM assumes the function is
>> defined.
>>
> As far as I understand it is not possible to build x86 without
> CONFIG_PERF_EVENTS right now. Actually kvm pmu code depends on
> CONFIG_PERF_EVENTS been enabled. I can easily provide the stub if
> needed though.

Right. Originally it could be enabled/disabled. Right now it cannot be,
but I believe Frederic is working on making it configurable again.

David

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-01 16:20       ` David Ahern
@ 2011-11-01 16:41         ` Gleb Natapov
  2011-11-02  7:42         ` Frederic Weisbecker
  1 sibling, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-01 16:41 UTC (permalink / raw)
  To: David Ahern
  Cc: kvm, avi, mtosatti, mingo, a.p.zijlstra, acme,
	Frederic Weisbecker

On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
> 
> 
> On 11/01/2011 10:13 AM, Gleb Natapov wrote:
> > On Tue, Nov 01, 2011 at 09:49:19AM -0600, David Ahern wrote:
> >> On 10/30/2011 10:53 AM, Gleb Natapov wrote:
> >>> KVM needs to know perf capability to decide which PMU it can expose to a
> >>> guest.
> >>>
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>>  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
> >>>  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
> >>>  arch/x86/kernel/cpu/perf_event.h       |    2 ++
> >>>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
> >>>  4 files changed, 27 insertions(+), 0 deletions(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >>> index f61c62f..7d7e57f 100644
> >>> --- a/arch/x86/include/asm/perf_event.h
> >>> +++ b/arch/x86/include/asm/perf_event.h
> >>> @@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
> >>>  	u64 host, guest;
> >>>  };
> >>>  
> >>> +struct x86_pmu_capability {
> >>> +	int version;
> >>> +	int num_counters_gp;
> >>> +	int num_counters_fixed;
> >>> +	int bit_width_gp;
> >>> +	int bit_width_fixed;
> >>> +	unsigned int events_mask;
> >>> +	int events_mask_len;
> >>> +};
> >>> +
> >>>  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
> >>> +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
> >>>  #else
> >>>  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
> >>>  {
> >>
> >> What about version of perf_get_x86_pmu_capability for CONFIG_PERF_EVENTS
> >> not enabled in host kernel? Next patch for KVM assumes the function is
> >> defined.
> >>
> > As far as I understand it is not possible to build x86 without
> > CONFIG_PERF_EVENTS right now. Actually kvm pmu code depends on
> > CONFIG_PERF_EVENTS been enabled. I can easily provide the stub if
> > needed though.
> 
> Right. Originally it could be enabled/disabled. Right now it cannot be,
> but I believe Frederic is working on making it configurable again.
> 
OK, I'll provide stub function in the next version, but I will not be
able to test it :)

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-01 16:20       ` David Ahern
  2011-11-01 16:41         ` Gleb Natapov
@ 2011-11-02  7:42         ` Frederic Weisbecker
  2011-11-07 14:45           ` Will Deacon
  1 sibling, 1 reply; 22+ messages in thread
From: Frederic Weisbecker @ 2011-11-02  7:42 UTC (permalink / raw)
  To: David Ahern
  Cc: Gleb Natapov, kvm, avi, mtosatti, linux-kernel, mingo,
	a.p.zijlstra, acme, Will Deacon

On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
> 
> 
> On 11/01/2011 10:13 AM, Gleb Natapov wrote:
> > On Tue, Nov 01, 2011 at 09:49:19AM -0600, David Ahern wrote:
> >> On 10/30/2011 10:53 AM, Gleb Natapov wrote:
> >>> KVM needs to know perf capability to decide which PMU it can expose to a
> >>> guest.
> >>>
> >>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> >>> ---
> >>>  arch/x86/include/asm/perf_event.h      |   11 +++++++++++
> >>>  arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
> >>>  arch/x86/kernel/cpu/perf_event.h       |    2 ++
> >>>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
> >>>  4 files changed, 27 insertions(+), 0 deletions(-)
> >>>
> >>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> >>> index f61c62f..7d7e57f 100644
> >>> --- a/arch/x86/include/asm/perf_event.h
> >>> +++ b/arch/x86/include/asm/perf_event.h
> >>> @@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
> >>>  	u64 host, guest;
> >>>  };
> >>>  
> >>> +struct x86_pmu_capability {
> >>> +	int version;
> >>> +	int num_counters_gp;
> >>> +	int num_counters_fixed;
> >>> +	int bit_width_gp;
> >>> +	int bit_width_fixed;
> >>> +	unsigned int events_mask;
> >>> +	int events_mask_len;
> >>> +};
> >>> +
> >>>  extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
> >>> +extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
> >>>  #else
> >>>  static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
> >>>  {
> >>
> >> What about version of perf_get_x86_pmu_capability for CONFIG_PERF_EVENTS
> >> not enabled in host kernel? Next patch for KVM assumes the function is
> >> defined.
> >>
> > As far as I understand it is not possible to build x86 without
> > CONFIG_PERF_EVENTS right now. Actually kvm pmu code depends on
> > CONFIG_PERF_EVENTS been enabled. I can easily provide the stub if
> > needed though.
> 
> Right. Originally it could be enabled/disabled. Right now it cannot be,
> but I believe Frederic is working on making it configurable again.
> 
> David

Yep. Will Deacon is working on making the breakpoints able to process
pure arch informations (ie: without beeing forced to use the perf attr
as a midlayer to define them).

Once we have that I can seperate the breakpoints implementation from perf
and make it opt-able.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 0/9] KVM in-guest performance monitoring
@ 2011-11-03 12:31 Gleb Natapov
  2011-11-03 12:31 ` [PATCH 1/9] KVM: Expose kvm_lapic_local_deliver() Gleb Natapov
                   ` (9 more replies)
  0 siblings, 10 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

This patchset exposes an emulated version 2 architectural performance
monitoring unit to KVM guests.  The PMU is emulated using perf_events,
so the host kernel can multiplex host-wide, host-user, and the
guest on available resources.

The patches should be applied on top of KVM patches from the patch series
"[PATCH v2 0/9] perf support for x86 guest/host-only bits" [1]

If you want to try running perf in a guest you need to apply the patch
below to qemu-kvm and use -cpu host on qemu command line. But DO NOT
TRY those patches without applying [2][3] to the host kernel first.
Don't tell me I didn't warn you!

[1] https://lkml.org/lkml/2011/10/5/153
[2] https://lkml.org/lkml/2011/10/18/390
[3] https://lkml.org/lkml/2011/10/23/163

Avi Kivity (8):
  KVM: Expose kvm_lapic_local_deliver()
  KVM: Expose a version 2 architectural PMU to a guests
  KVM: Add generic RDPMC support
  KVM: SVM: Intercept RDPMC
  KVM: VMX: Intercept RDPMC
  KVM: Expose the architectural performance monitoring CPUID leaf
  KVM: x86 emulator: fix RDPMC privilege check
  KVM: x86 emulator: implement RDPMC (0F 33)

Gleb Natapov (1):
  perf: expose perf capability to other modules.

 arch/x86/include/asm/kvm_emulate.h     |    1 +
 arch/x86/include/asm/kvm_host.h        |   44 +++
 arch/x86/include/asm/perf_event.h      |   11 +
 arch/x86/kernel/cpu/perf_event.c       |   11 +
 arch/x86/kernel/cpu/perf_event.h       |    2 +
 arch/x86/kernel/cpu/perf_event_intel.c |    3 +
 arch/x86/kvm/Makefile                  |    2 +-
 arch/x86/kvm/emulate.c                 |   13 +-
 arch/x86/kvm/lapic.c                   |    2 +-
 arch/x86/kvm/lapic.h                   |    1 +
 arch/x86/kvm/pmu.c                     |  513 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm.c                     |   15 +
 arch/x86/kvm/vmx.c                     |   15 +-
 arch/x86/kvm/x86.c                     |   65 ++++-
 include/linux/kvm_host.h               |    1 +
 15 files changed, 686 insertions(+), 13 deletions(-)
 create mode 100644 arch/x86/kvm/pmu.c

diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index f179999..ff2a0ca 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -1178,11 +1178,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
         *edx = 0;
         break;
     case 0xA:
-        /* Architectural Performance Monitoring Leaf */
-        *eax = 0;
-        *ebx = 0;
-        *ecx = 0;
-        *edx = 0;
+	if (kvm_enabled()) {
+            KVMState *s = env->kvm_state;
+
+            *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
+            *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX);
+            *ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX);
+            *edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX);
+	} else {
+		/* Architectural Performance Monitoring Leaf */
+		*eax = 0; //0x07280402;
+		*ebx = 0;
+		*ecx = 0;
+		*edx = 0; //0x00000503;
+	}
         break;
     case 0xD:
         /* Processor Extended State */
-- 
1.7.5.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 1/9] KVM: Expose kvm_lapic_local_deliver()
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 2/9] KVM: Expose a version 2 architectural PMU to a guests Gleb Natapov
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Needed to deliver performance monitoring interrupts.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/lapic.c |    2 +-
 arch/x86/kvm/lapic.h |    1 +
 2 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 54abb40..e87e43e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1120,7 +1120,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
+int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
 	u32 reg = apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 138e8cc..6f4ce25 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -34,6 +34,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
+int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 2/9] KVM: Expose a version 2 architectural PMU to a guests
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
  2011-11-03 12:31 ` [PATCH 1/9] KVM: Expose kvm_lapic_local_deliver() Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 3/9] KVM: Add generic RDPMC support Gleb Natapov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Use perf_events to emulate an architectural PMU, version 2.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   43 ++++
 arch/x86/kvm/Makefile           |    2 +-
 arch/x86/kvm/pmu.c              |  513 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   20 +-
 include/linux/kvm_host.h        |    1 +
 5 files changed, 570 insertions(+), 9 deletions(-)
 create mode 100644 arch/x86/kvm/pmu.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c1f19de..53caa94 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -20,6 +20,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
+#include <linux/perf_event.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -296,6 +297,36 @@ struct kvm_mmu {
 	u64 pdptrs[4]; /* pae */
 };
 
+enum pmc_type {
+	KVM_PMC_GP,
+	KVM_PMC_FIXED,
+};
+
+struct kvm_pmc {
+	enum pmc_type type;
+	u64 counter;
+	u64 eventsel;
+	struct perf_event *perf_event;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pmu {
+	unsigned nr_arch_gp_counters;
+	unsigned nr_arch_fixed_counters;
+	unsigned available_event_types;
+	u64 fixed_ctr_ctrl;
+	u64 global_ctrl;
+	u64 global_status;
+	u64 global_ovf_ctrl;
+	u64 gp_counter_bitmask;
+	u64 fixed_counter_bitmask;
+	u64 global_ctrl_mask;
+	u8 version;
+	struct kvm_pmc gp_counters[X86_PMC_MAX_GENERIC];
+	struct kvm_pmc fixed_counters[X86_PMC_MAX_FIXED];
+	u64 reprogram_pmi;
+};
+
 struct kvm_vcpu_arch {
 	/*
 	 * rip and regs accesses must go through
@@ -433,6 +464,8 @@ struct kvm_vcpu_arch {
 	unsigned access;
 	gfn_t mmio_gfn;
 
+	struct kvm_pmu pmu;
+
 	/* used for guest single stepping over the given code position */
 	unsigned long singlestep_rip;
 
@@ -894,4 +927,14 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
 
+void kvm_pmu_init(struct kvm_vcpu *vcpu);
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+void kvm_pmu_reset(struct kvm_vcpu *vcpu);
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index f15501f..cfca03f 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o timer.o
+			   i8254.o timer.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
new file mode 100644
index 0000000..66b64a7
--- /dev/null
+++ b/arch/x86/kvm/pmu.c
@@ -0,0 +1,513 @@
+/*
+ * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ *
+ * Copyright 2011 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ *   Avi Kivity   <avi@redhat.com>
+ *   Gleb Natapov <gleb@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kvm_host.h>
+#include <linux/perf_event.h>
+#include "x86.h"
+#include "lapic.h"
+
+static struct kvm_arch_event_perf_mapping {
+	u8 eventsel;
+	u8 unit_mask;
+	unsigned event_type;
+	bool inexact;
+} arch_events[] = {
+	/* Index must match CPUID 0x0A.EBX bit vector */
+	[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
+	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
+	[2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES  },
+	[3] = { 0x2e, 0x4f, PERF_COUNT_HW_CACHE_REFERENCES },
+	[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
+	[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+	[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+};
+
+/* mapping between fixed pmc index and arch_events array */
+int fixed_pmc_events[] = {1, 0, 2};
+
+static bool pmc_is_gp(struct kvm_pmc *pmc)
+{
+	return pmc->type == KVM_PMC_GP;
+}
+
+static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+
+	return pmc_is_gp(pmc) ? pmu->gp_counter_bitmask :
+		pmu->fixed_counter_bitmask;
+}
+
+static inline int pmc_to_global_idx(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	struct kvm_pmc *counters;
+	int shift;
+
+	if (pmc_is_gp(pmc)) {
+		counters = pmu->gp_counters;
+		shift = X86_PMC_IDX_GENERIC;
+	} else {
+		counters = pmu->fixed_counters;
+		shift = X86_PMC_IDX_FIXED;
+	}
+
+	return pmc - counters + shift;
+}
+
+static inline bool pmc_enabled(struct kvm_pmc *pmc)
+{
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	return test_bit(pmc_to_global_idx(pmc),
+			(unsigned long *)&pmu->global_ctrl);
+}
+
+static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr,
+					 u32 base)
+{
+	if (msr >= base && msr < base + pmu->nr_arch_gp_counters)
+		return &pmu->gp_counters[msr - base];
+	return NULL;
+}
+
+static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
+{
+	int base = MSR_CORE_PERF_FIXED_CTR0;
+	if (msr >= base && msr < base + pmu->nr_arch_fixed_counters)
+		return &pmu->fixed_counters[msr - base];
+	return NULL;
+}
+
+static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx)
+{
+	if (idx < X86_PMC_IDX_FIXED)
+		return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + idx, MSR_P6_EVNTSEL0);
+	else
+		return get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + idx -
+				X86_PMC_IDX_FIXED);
+}
+
+static void kvm_perf_overflow(struct perf_event *perf_event,
+			      struct perf_sample_data *data,
+			      struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	__set_bit(pmc_to_global_idx(pmc),
+			(unsigned long *)&pmu->global_status);
+}
+
+static void kvm_perf_overflow_intr(struct perf_event *perf_event,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+	struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu;
+	if (!__test_and_set_bit(pmc_to_global_idx(pmc),
+				(unsigned long *)&pmu->reprogram_pmi)) {
+		kvm_perf_overflow(perf_event, data, regs);
+		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+	}
+}
+
+static u64 read_pmc(struct kvm_pmc *pmc)
+{
+	u64 counter, enabled, running;
+
+	counter = pmc->counter;
+
+	if (pmc->perf_event)
+		counter += perf_event_read_value(pmc->perf_event,
+						 &enabled, &running);
+
+	/* FIXME: Scaling needed? */
+
+	return counter & pmc_bitmask(pmc);
+}
+
+static void stop_counter(struct kvm_pmc *pmc)
+{
+	if (pmc->perf_event) {
+		pmc->counter = read_pmc(pmc);
+		perf_event_release_kernel(pmc->perf_event);
+		pmc->perf_event = NULL;
+	}
+}
+
+static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
+		unsigned config, bool exclude_user, bool exclude_kernel,
+		bool intr)
+{
+	struct perf_event *event;
+	struct perf_event_attr attr = {
+		.type = type,
+		.size = sizeof(attr),
+		.exclude_idle = true,
+		.exclude_host = 1,
+		.exclude_user = exclude_user,
+		.exclude_kernel = exclude_kernel,
+		.sample_period = (-pmc->counter) & pmc_bitmask(pmc),
+		.config = config,
+	};
+
+	event = perf_event_create_kernel_counter(&attr, -1, current,
+						 intr ? kvm_perf_overflow_intr :
+						 kvm_perf_overflow, pmc);
+	if (IS_ERR(event)) {
+		printk_once("kvm: pmu event creation failed %ld\n",
+				PTR_ERR(event));
+		return;
+	}
+
+	pmc->perf_event = event;
+	__clear_bit(pmc_to_global_idx(pmc),
+			(unsigned long *)&pmc->vcpu->arch.pmu.reprogram_pmi);
+}
+
+static unsigned find_arch_event(struct kvm_pmu *pmu, u8 event_select,
+		u8 unit_mask)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(arch_events); i++)
+		if (arch_events[i].eventsel == event_select
+				&& arch_events[i].unit_mask == unit_mask
+				&& (pmu->available_event_types & (1 << i)))
+			break;
+
+	if (i == ARRAY_SIZE(arch_events))
+		return PERF_COUNT_HW_MAX;
+
+	return arch_events[i].event_type;
+}
+
+static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
+{
+	unsigned config, type = PERF_TYPE_RAW;
+	u8 event_select, unit_mask;
+
+	pmc->eventsel = eventsel;
+
+	stop_counter(pmc);
+
+	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_enabled(pmc))
+		return;
+
+	event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
+	unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+
+	if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE |
+				ARCH_PERFMON_EVENTSEL_INV |
+				ARCH_PERFMON_EVENTSEL_CMASK))) {
+		config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
+				unit_mask);
+		if (config != PERF_COUNT_HW_MAX)
+			type = PERF_TYPE_HARDWARE;
+	}
+
+	if (type == PERF_TYPE_RAW)
+		config = eventsel & X86_RAW_EVENT_MASK;
+
+	reprogram_counter(pmc, type, config,
+			!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+			!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+			eventsel & ARCH_PERFMON_EVENTSEL_INT);
+}
+
+static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
+{
+	unsigned en = en_pmi & 0x3;
+	bool pmi = en_pmi & 0x8;
+
+	stop_counter(pmc);
+
+	if (!en || !pmc_enabled(pmc))
+		return;
+
+	reprogram_counter(pmc, PERF_TYPE_HARDWARE,
+			arch_events[fixed_pmc_events[idx]].event_type,
+			!(en & 0x2), /* exclude user */
+			!(en & 0x1), /* exclude kernel */
+			pmi);
+}
+
+#define FIXED_EN_PMI(R, I) (((R) >> ((I) * 4)) & 0xf)
+
+static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
+{
+	int i;
+
+	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
+		u8 en_pmi = FIXED_EN_PMI(data, i);
+		struct kvm_pmc *pmc = get_fixed_pmc(pmu,
+				MSR_CORE_PERF_FIXED_CTR0 + i);
+
+		if (FIXED_EN_PMI(pmu->fixed_ctr_ctrl, i) == en_pmi)
+			continue;
+
+		reprogram_fixed_counter(pmc, en_pmi, i);
+	}
+
+	pmu->fixed_ctr_ctrl = data;
+}
+
+static void reprogram_idx(struct kvm_pmu *pmu, int idx)
+{
+	struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx);
+
+	if (!pmc)
+		return;
+
+	if (pmc_is_gp(pmc))
+		reprogram_gp_counter(pmc, pmc->eventsel);
+	else {
+		int fidx = idx - X86_PMC_IDX_FIXED;
+		reprogram_fixed_counter(pmc,
+				FIXED_EN_PMI(pmu->fixed_ctr_ctrl, fidx), fidx);
+	}
+}
+
+static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data)
+{
+	int bit;
+	u64 diff = pmu->global_ctrl ^ data;
+
+	pmu->global_ctrl = data;
+
+	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
+		reprogram_idx(pmu, bit);
+}
+
+bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int ret;
+
+	switch (msr) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		ret = pmu->version > 1;
+		break;
+	default:
+		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)
+			|| get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0)
+			|| get_fixed_pmc(pmu, msr);
+		break;
+	}
+	return ret;
+}
+
+int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		*data = pmu->fixed_ctr_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		*data = pmu->global_status;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		*data = pmu->global_ctrl;
+		return 0;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		*data = pmu->global_ovf_ctrl;
+		return 0;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			*data = read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			*data = pmc->eventsel;
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc;
+
+	switch (index) {
+	case MSR_CORE_PERF_FIXED_CTR_CTRL:
+		if (pmu->fixed_ctr_ctrl == data)
+			return 0;
+		if (!(data & 0xfffffffffffff444)) {
+			reprogram_fixed_counters(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_STATUS:
+		break; /* RO MSR */
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (pmu->global_ctrl == data)
+			return 0;
+		if (!(data & pmu->global_ctrl_mask)) {
+			global_ctrl_changed(pmu, data);
+			return 0;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+		if (!(data & (pmu->global_ctrl_mask & ~(3ull<<62)))) {
+			pmu->global_status &= ~data;
+			pmu->global_ovf_ctrl = data;
+			return 0;
+		}
+		break;
+	default:
+		if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) ||
+				(pmc = get_fixed_pmc(pmu, index))) {
+			data = (s64)(s32)data;
+			pmc->counter += data - read_pmc(pmc);
+			return 0;
+		} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
+			if (data == pmc->eventsel)
+				return 0;
+			if (!(data & 0xffffffff00200000ull)) {
+				reprogram_gp_counter(pmc, data);
+				return 0;
+			}
+		}
+	}
+	return 1;
+}
+
+int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	bool fast_mode = pmc & (1u << 31);
+	bool fixed = pmc & (1u << 30);
+	struct kvm_pmc *counters;
+	u64 ctr;
+
+	pmc &= (3u << 30) - 1;
+	if (!fixed && pmc >= pmu->nr_arch_gp_counters)
+		return 1;
+	if (fixed && pmc >= pmu->nr_arch_fixed_counters)
+		return 1;
+	counters = fixed ? pmu->fixed_counters : pmu->gp_counters;
+	ctr = read_pmc(&counters[pmc]);
+	if (fast_mode)
+		ctr = (u32)ctr;
+	*data = ctr;
+
+	return 0;
+}
+
+void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_cpuid_entry2 *entry;
+	unsigned bitmap_len;
+
+	pmu->nr_arch_gp_counters = 0;
+	pmu->nr_arch_fixed_counters = 0;
+	pmu->fixed_counter_bitmask = 0;
+	pmu->version = 0;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+	if (!entry)
+		return;
+
+	pmu->version = entry->eax & 0xff;
+	if (!pmu->version)
+		return;
+
+	pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
+			X86_PMC_MAX_GENERIC);
+	pmu->gp_counter_bitmask = ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
+	bitmap_len = (entry->eax >> 24) & 0xff;
+	pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+
+	if (pmu->version > 1) {
+		pmu->nr_arch_fixed_counters = min((int)(entry->edx) & 0x1f,
+				X86_PMC_MAX_FIXED);
+		pmu->fixed_counter_bitmask =
+			((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+		pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1)
+				| (((1ull << pmu->nr_arch_fixed_counters) - 1)
+					<< X86_PMC_IDX_FIXED));
+	} else
+		pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1;
+}
+
+void kvm_pmu_init(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	memset(pmu, 0, sizeof(*pmu));
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		pmu->gp_counters[i].type = KVM_PMC_GP;
+		pmu->gp_counters[i].vcpu = vcpu;
+	}
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++) {
+		pmu->fixed_counters[i].type = KVM_PMC_FIXED;
+		pmu->fixed_counters[i].vcpu = vcpu;
+	}
+	kvm_pmu_cpuid_update(vcpu);
+}
+
+void kvm_pmu_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	int i;
+
+	for (i = 0; i < X86_PMC_MAX_GENERIC; i++) {
+		struct kvm_pmc *pmc = &pmu->gp_counters[i];
+		stop_counter(pmc);
+		pmc->counter = pmc->eventsel = 0;
+	}
+
+	for (i = 0; i < X86_PMC_MAX_FIXED; i++)
+		stop_counter(&pmu->fixed_counters[i]);
+
+	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
+		pmu->global_ovf_ctrl = 0;
+}
+
+void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_pmu_reset(vcpu);
+}
+
+void kvm_handle_pmu_event(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	u64 bitmask;
+	int bit;
+
+	if (vcpu->arch.apic)
+		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
+
+	bitmask = pmu->reprogram_pmi;
+
+	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) {
+		struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit);
+
+		if (unlikely(!pmc || !pmc->perf_event)) {
+			__clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
+			continue;
+		}
+
+		reprogram_idx(pmu, bit);
+	}
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b1c526..eb60363 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -623,6 +623,8 @@ static void update_cpuid(struct kvm_vcpu *vcpu)
 
 	if (apic)
 		apic->lapic_timer.timer_mode_mask = timer_mode_mask;
+
+	kvm_pmu_cpuid_update(vcpu);
 }
 
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -1655,8 +1657,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	 * which we perfectly emulate ;-). Any other value should be at least
 	 * reported, some guests depend on them.
 	 */
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
 	case MSR_K7_EVNTSEL2:
@@ -1668,8 +1668,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	/* at least RHEL 4 unconditionally writes to the perfctr registers,
 	 * so we ignore writes to make it happy.
 	 */
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
 	case MSR_K7_PERFCTR0:
 	case MSR_K7_PERFCTR1:
 	case MSR_K7_PERFCTR2:
@@ -1706,6 +1704,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	default:
 		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
 			return xen_hvm_config(vcpu, data);
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_set_msr(vcpu, msr, data);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
 				msr, data);
@@ -1868,10 +1868,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_K8_SYSCFG:
 	case MSR_K7_HWCR:
 	case MSR_VM_HSAVE_PA:
-	case MSR_P6_PERFCTR0:
-	case MSR_P6_PERFCTR1:
-	case MSR_P6_EVNTSEL0:
-	case MSR_P6_EVNTSEL1:
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_PERFCTR0:
 	case MSR_K8_INT_PENDING_MSG:
@@ -1982,6 +1978,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		data = 0xbe702111;
 		break;
 	default:
+		if (kvm_pmu_msr(vcpu, msr))
+			return kvm_pmu_get_msr(vcpu, msr, pdata);
 		if (!ignore_msrs) {
 			pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 			return 1;
@@ -5730,6 +5728,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			process_nmi(vcpu);
 		req_immediate_exit =
 			kvm_check_request(KVM_REQ_IMMEDIATE_EXIT, vcpu);
+		if (kvm_check_request(KVM_REQ_PMU, vcpu))
+			kvm_handle_pmu_event(vcpu);
 	}
 
 	r = kvm_mmu_reload(vcpu);
@@ -6470,6 +6470,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	kvm_async_pf_hash_reset(vcpu);
 	vcpu->arch.apf.halted = false;
 
+	kvm_pmu_reset(vcpu);
+
 	return kvm_x86_ops->vcpu_reset(vcpu);
 }
 
@@ -6558,6 +6560,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		goto fail_free_mce_banks;
 
 	kvm_async_pf_hash_reset(vcpu);
+	kvm_pmu_init(vcpu);
 
 	return 0;
 fail_free_mce_banks:
@@ -6576,6 +6579,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
+	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c6a2ec9..7ad40d5 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -52,6 +52,7 @@
 #define KVM_REQ_STEAL_UPDATE      13
 #define KVM_REQ_NMI               14
 #define KVM_REQ_IMMEDIATE_EXIT    15
+#define KVM_REQ_PMU               16
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 3/9] KVM: Add generic RDPMC support
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
  2011-11-03 12:31 ` [PATCH 1/9] KVM: Expose kvm_lapic_local_deliver() Gleb Natapov
  2011-11-03 12:31 ` [PATCH 2/9] KVM: Expose a version 2 architectural PMU to a guests Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 4/9] KVM: SVM: Intercept RDPMC Gleb Natapov
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Add a helper function that emulates the RDPMC instruction operation.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/x86.c              |   15 +++++++++++++++
 2 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 53caa94..729b3b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -767,6 +767,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+bool kvm_rdpmc(struct kvm_vcpu *vcpu);
 
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb60363..5ea4cb8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -814,6 +814,21 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
 
+bool kvm_rdpmc(struct kvm_vcpu *vcpu)
+{
+	u32 ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+	u64 data;
+	int err;
+
+	err = kvm_pmu_read_pmc(vcpu, ecx, &data);
+	if (err)
+		return err;
+	kvm_register_write(vcpu, VCPU_REGS_RAX, (u32)data);
+	kvm_register_write(vcpu, VCPU_REGS_RDX, data >> 32);
+	return err;
+}
+EXPORT_SYMBOL_GPL(kvm_rdpmc);
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 4/9] KVM: SVM: Intercept RDPMC
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (2 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 3/9] KVM: Add generic RDPMC support Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 5/9] KVM: VMX: " Gleb Natapov
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Intercept RDPMC and forward it to the PMU emulation code.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/svm.c |   15 +++++++++++++++
 1 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e32243e..5fa553b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1014,6 +1014,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_intercept(svm, INTERCEPT_NMI);
 	set_intercept(svm, INTERCEPT_SMI);
 	set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+	set_intercept(svm, INTERCEPT_RDPMC);
 	set_intercept(svm, INTERCEPT_CPUID);
 	set_intercept(svm, INTERCEPT_INVD);
 	set_intercept(svm, INTERCEPT_HLT);
@@ -2770,6 +2771,19 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
+static int rdpmc_interception(struct vcpu_svm *svm)
+{
+	int err;
+
+	if (!static_cpu_has(X86_FEATURE_NRIPS))
+		return emulate_on_interception(svm);
+
+	err = kvm_rdpmc(&svm->vcpu);
+	kvm_complete_insn_gp(&svm->vcpu, err);
+
+	return 1;
+}
+
 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
 {
 	unsigned long cr0 = svm->vcpu.arch.cr0;
@@ -3190,6 +3204,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_SMI]				= nop_on_interception,
 	[SVM_EXIT_INIT]				= nop_on_interception,
 	[SVM_EXIT_VINTR]			= interrupt_window_interception,
+	[SVM_EXIT_RDPMC]			= rdpmc_interception,
 	[SVM_EXIT_CPUID]			= cpuid_interception,
 	[SVM_EXIT_IRET]                         = iret_interception,
 	[SVM_EXIT_INVD]                         = emulate_on_interception,
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 5/9] KVM: VMX: Intercept RDPMC
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (3 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 4/9] KVM: SVM: Intercept RDPMC Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Intercept RDPMC and forward it to the PMU emulation code.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c |   15 ++++++++++++++-
 1 files changed, 14 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e28d58..a6535ba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1956,6 +1956,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
+		CPU_BASED_RDPMC_EXITING |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
@@ -2414,7 +2415,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_USE_TSC_OFFSETING |
 	      CPU_BASED_MWAIT_EXITING |
 	      CPU_BASED_MONITOR_EXITING |
-	      CPU_BASED_INVLPG_EXITING;
+	      CPU_BASED_INVLPG_EXITING |
+	      CPU_BASED_RDPMC_EXITING;
 
 	if (yield_on_hlt)
 		min |= CPU_BASED_HLT_EXITING;
@@ -4615,6 +4617,16 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_rdpmc(struct kvm_vcpu *vcpu)
+{
+	int err;
+
+	err = kvm_rdpmc(vcpu);
+	kvm_complete_insn_gp(vcpu, err);
+
+	return 1;
+}
+
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
 	skip_emulated_instruction(vcpu);
@@ -5565,6 +5577,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_HLT]                     = handle_halt,
 	[EXIT_REASON_INVD]		      = handle_invd,
 	[EXIT_REASON_INVLPG]		      = handle_invlpg,
+	[EXIT_REASON_RDPMC]                   = handle_rdpmc,
 	[EXIT_REASON_VMCALL]                  = handle_vmcall,
 	[EXIT_REASON_VMCLEAR]	              = handle_vmclear,
 	[EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (4 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 5/9] KVM: VMX: " Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 7/9] KVM: Expose the architectural performance monitoring CPUID leaf Gleb Natapov
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

KVM needs to know perf capability to decide which PMU it can expose to a
guest.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/perf_event.h      |   11 +++++++++++
 arch/x86/kernel/cpu/perf_event.c       |   11 +++++++++++
 arch/x86/kernel/cpu/perf_event.h       |    2 ++
 arch/x86/kernel/cpu/perf_event_intel.c |    3 +++
 4 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f61c62f..7d7e57f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -201,7 +201,18 @@ struct perf_guest_switch_msr {
 	u64 host, guest;
 };
 
+struct x86_pmu_capability {
+	int version;
+	int num_counters_gp;
+	int num_counters_fixed;
+	int bit_width_gp;
+	int bit_width_fixed;
+	unsigned int events_mask;
+	int events_mask_len;
+};
+
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 #else
 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6408910..94ac9ca 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1570,3 +1570,14 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 
 	return misc;
 }
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+	cap->version = x86_pmu.version;
+	cap->num_counters_gp = x86_pmu.num_counters;
+	cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+	cap->bit_width_gp = cap->bit_width_fixed = x86_pmu.cntval_bits;
+	cap->events_mask = x86_pmu.events_mask;
+	cap->events_mask_len = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index b9698d4..e9ed238 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -259,6 +259,8 @@ struct x86_pmu {
 	int		num_counters_fixed;
 	int		cntval_bits;
 	u64		cntval_mask;
+	u32		events_mask;
+	int		events_mask_len;
 	int		apic;
 	u64		max_period;
 	struct event_constraint *
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index e09ca20..64e5f35 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1580,6 +1580,8 @@ __init int intel_pmu_init(void)
 	x86_pmu.num_counters		= eax.split.num_counters;
 	x86_pmu.cntval_bits		= eax.split.bit_width;
 	x86_pmu.cntval_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.events_mask		= ebx;
+	x86_pmu.events_mask_len		= eax.split.mask_length;
 
 	/*
 	 * Quirk: v2 perfmon does not report fixed-purpose events, so
@@ -1651,6 +1653,7 @@ __init int intel_pmu_init(void)
 			 * architectural event which is often completely bogus:
 			 */
 			intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
+			x86_pmu.events_mask &= ~0x40;
 
 			pr_cont("erratum AAJ80 worked around, ");
 		}
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 7/9] KVM: Expose the architectural performance monitoring CPUID leaf
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (5 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 8/9] KVM: x86 emulator: fix RDPMC privilege check Gleb Natapov
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Provide a CPUID leaf that describes the emulated PMU.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/x86.c |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5ea4cb8..56153a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2543,6 +2543,28 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	}
 	case 9:
 		break;
+	case 0xa: { /* Architectural Performance Monitoring */
+		struct x86_pmu_capability cap;
+
+		perf_get_x86_pmu_capability(&cap);
+
+		/*
+		 * Only support guest architectural pmu on a host
+		 * with architectural pmu.
+		 */
+		if (!cap.version)
+			memset(&cap, 0, sizeof(cap));
+
+		entry->eax = min(cap.version, 2)
+			| (cap.num_counters_gp << 8)
+			| (cap.bit_width_gp << 16)
+			| (cap.events_mask_len << 24);
+		entry->ebx = cap.events_mask;
+		entry->ecx = 0;
+		entry->edx = cap.num_counters_fixed
+			| (cap.bit_width_fixed << 5);
+		break;
+	}
 	/* function 0xb has additional index. */
 	case 0xb: {
 		int i, level_type;
@@ -2637,7 +2659,6 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	case 3: /* Processor serial number */
 	case 5: /* MONITOR/MWAIT */
 	case 6: /* Thermal management */
-	case 0xA: /* Architectural Performance Monitoring */
 	case 0x80000007: /* Advanced power management */
 	case 0xC0000002:
 	case 0xC0000003:
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 8/9] KVM: x86 emulator: fix RDPMC privilege check
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (6 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 7/9] KVM: Expose the architectural performance monitoring CPUID leaf Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:31 ` [PATCH 9/9] KVM: x86 emulator: implement RDPMC (0F 33) Gleb Natapov
  2011-11-03 12:40 ` [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

RDPMC is only privileged if CR4.PCE=0.  check_rdpmc() already implements this,
so all we need to do is drop the Priv flag.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/emulate.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8547958..c0ee85b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3254,7 +3254,7 @@ static struct opcode twobyte_table[256] = {
 	DI(ImplicitOps | Priv, wrmsr),
 	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
 	DI(ImplicitOps | Priv, rdmsr),
-	DIP(ImplicitOps | Priv, rdpmc, check_rdpmc),
+	DIP(ImplicitOps, rdpmc, check_rdpmc),
 	I(ImplicitOps | VendorSpecific, em_sysenter),
 	I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
 	N, N,
-- 
1.7.5.3


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 9/9] KVM: x86 emulator: implement RDPMC (0F 33)
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (7 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 8/9] KVM: x86 emulator: fix RDPMC privilege check Gleb Natapov
@ 2011-11-03 12:31 ` Gleb Natapov
  2011-11-03 12:40 ` [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:31 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

From: Avi Kivity <avi@redhat.com>

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |    1 +
 arch/x86/kvm/emulate.c             |   13 ++++++++++++-
 arch/x86/kvm/x86.c                 |    7 +++++++
 3 files changed, 20 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 9a4acf4..ab4092e 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -181,6 +181,7 @@ struct x86_emulate_ops {
 	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
 	int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
 	int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
+	int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
 	void (*halt)(struct x86_emulate_ctxt *ctxt);
 	void (*wbinvd)(struct x86_emulate_ctxt *ctxt);
 	int (*fix_hypercall)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c0ee85b..d76a852 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2623,6 +2623,17 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 	return X86EMUL_CONTINUE;
 }
 
+static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
+{
+	u64 pmc;
+
+	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
+		return emulate_gp(ctxt, 0);
+	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
+	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
+	return X86EMUL_CONTINUE;
+}
+
 static int em_mov(struct x86_emulate_ctxt *ctxt)
 {
 	ctxt->dst.val = ctxt->src.val;
@@ -3254,7 +3265,7 @@ static struct opcode twobyte_table[256] = {
 	DI(ImplicitOps | Priv, wrmsr),
 	IIP(ImplicitOps, em_rdtsc, rdtsc, check_rdtsc),
 	DI(ImplicitOps | Priv, rdmsr),
-	DIP(ImplicitOps, rdpmc, check_rdpmc),
+	IIP(ImplicitOps, em_rdpmc, rdpmc, check_rdpmc),
 	I(ImplicitOps | VendorSpecific, em_sysenter),
 	I(ImplicitOps | Priv | VendorSpecific, em_sysexit),
 	N, N,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 56153a9..b1bd52f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4654,6 +4654,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
 	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
 }
 
+static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
+			     u32 pmc, u64 *pdata)
+{
+	return kvm_pmu_read_pmc(emul_to_vcpu(ctxt), pmc, pdata);
+}
+
 static void emulator_halt(struct x86_emulate_ctxt *ctxt)
 {
 	emul_to_vcpu(ctxt)->arch.halt_request = 1;
@@ -4706,6 +4712,7 @@ static struct x86_emulate_ops emulate_ops = {
 	.set_dr              = emulator_set_dr,
 	.set_msr             = emulator_set_msr,
 	.get_msr             = emulator_get_msr,
+	.read_pmc            = emulator_read_pmc,
 	.halt                = emulator_halt,
 	.wbinvd              = emulator_wbinvd,
 	.fix_hypercall       = emulator_fix_hypercall,
-- 
1.7.5.3

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 0/9] KVM in-guest performance monitoring
  2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
                   ` (8 preceding siblings ...)
  2011-11-03 12:31 ` [PATCH 9/9] KVM: x86 emulator: implement RDPMC (0F 33) Gleb Natapov
@ 2011-11-03 12:40 ` Gleb Natapov
  9 siblings, 0 replies; 22+ messages in thread
From: Gleb Natapov @ 2011-11-03 12:40 UTC (permalink / raw)
  To: kvm; +Cc: avi, mtosatti, linux-kernel, mingo, a.p.zijlstra, acme

Disregard this one please. I've sent old version of the patch series
along with new one by mistake. Please look at PATCHv2 version of the
patch series. Sorry about that :(

On Thu, Nov 03, 2011 at 02:31:29PM +0200, Gleb Natapov wrote:
> This patchset exposes an emulated version 2 architectural performance
> monitoring unit to KVM guests.  The PMU is emulated using perf_events,
> so the host kernel can multiplex host-wide, host-user, and the
> guest on available resources.
> 
> The patches should be applied on top of KVM patches from the patch series
> "[PATCH v2 0/9] perf support for x86 guest/host-only bits" [1]
> 
> If you want to try running perf in a guest you need to apply the patch
> below to qemu-kvm and use -cpu host on qemu command line. But DO NOT
> TRY those patches without applying [2][3] to the host kernel first.
> Don't tell me I didn't warn you!
> 
> [1] https://lkml.org/lkml/2011/10/5/153
> [2] https://lkml.org/lkml/2011/10/18/390
> [3] https://lkml.org/lkml/2011/10/23/163
> 
> Avi Kivity (8):
>   KVM: Expose kvm_lapic_local_deliver()
>   KVM: Expose a version 2 architectural PMU to a guests
>   KVM: Add generic RDPMC support
>   KVM: SVM: Intercept RDPMC
>   KVM: VMX: Intercept RDPMC
>   KVM: Expose the architectural performance monitoring CPUID leaf
>   KVM: x86 emulator: fix RDPMC privilege check
>   KVM: x86 emulator: implement RDPMC (0F 33)
> 
> Gleb Natapov (1):
>   perf: expose perf capability to other modules.
> 
>  arch/x86/include/asm/kvm_emulate.h     |    1 +
>  arch/x86/include/asm/kvm_host.h        |   44 +++
>  arch/x86/include/asm/perf_event.h      |   11 +
>  arch/x86/kernel/cpu/perf_event.c       |   11 +
>  arch/x86/kernel/cpu/perf_event.h       |    2 +
>  arch/x86/kernel/cpu/perf_event_intel.c |    3 +
>  arch/x86/kvm/Makefile                  |    2 +-
>  arch/x86/kvm/emulate.c                 |   13 +-
>  arch/x86/kvm/lapic.c                   |    2 +-
>  arch/x86/kvm/lapic.h                   |    1 +
>  arch/x86/kvm/pmu.c                     |  513 ++++++++++++++++++++++++++++++++
>  arch/x86/kvm/svm.c                     |   15 +
>  arch/x86/kvm/vmx.c                     |   15 +-
>  arch/x86/kvm/x86.c                     |   65 ++++-
>  include/linux/kvm_host.h               |    1 +
>  15 files changed, 686 insertions(+), 13 deletions(-)
>  create mode 100644 arch/x86/kvm/pmu.c
> 
> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
> index f179999..ff2a0ca 100644
> --- a/target-i386/cpuid.c
> +++ b/target-i386/cpuid.c
> @@ -1178,11 +1178,20 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>          *edx = 0;
>          break;
>      case 0xA:
> -        /* Architectural Performance Monitoring Leaf */
> -        *eax = 0;
> -        *ebx = 0;
> -        *ecx = 0;
> -        *edx = 0;
> +	if (kvm_enabled()) {
> +            KVMState *s = env->kvm_state;
> +
> +            *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX);
> +            *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX);
> +            *ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX);
> +            *edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX);
> +	} else {
> +		/* Architectural Performance Monitoring Leaf */
> +		*eax = 0; //0x07280402;
> +		*ebx = 0;
> +		*ecx = 0;
> +		*edx = 0; //0x00000503;
> +	}
>          break;
>      case 0xD:
>          /* Processor Extended State */
> -- 
> 1.7.5.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
			Gleb.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-02  7:42         ` Frederic Weisbecker
@ 2011-11-07 14:45           ` Will Deacon
  2011-11-10  8:58             ` Frederic Weisbecker
  0 siblings, 1 reply; 22+ messages in thread
From: Will Deacon @ 2011-11-07 14:45 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: David Ahern, Gleb Natapov, kvm@vger.kernel.org, avi@redhat.com,
	mtosatti@redhat.com, linux-kernel@redhat.com, mingo@elte.hu,
	a.p.zijlstra@chello.nl, acme@ghostprotocols.net

Hi Frederic,

On Wed, Nov 02, 2011 at 07:42:04AM +0000, Frederic Weisbecker wrote:
> On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
> > Right. Originally it could be enabled/disabled. Right now it cannot be,
> > but I believe Frederic is working on making it configurable again.
> > 
> > David
> 
> Yep. Will Deacon is working on making the breakpoints able to process
> pure arch informations (ie: without beeing forced to use the perf attr
> as a midlayer to define them).
> 
> Once we have that I can seperate the breakpoints implementation from perf
> and make it opt-able.

How do you foresee kdb fitting into this? I see that currently [on x86] we
cook up perf_event structures with a specific overflow handler set. If we
want to move this over to using a completely arch-defined structure, then
we're going to end up with an overflow handler field in both perf_event
*and* the arch-specific structure, which doesn't feel right to me.

Of course, if the goal is only to separate ptrace (i.e. user debugging) from
the perf dependency then we don't need the overflow handler because we'll
always just send SIGTRAP to the current task.

Any ideas?

Will

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-07 14:45           ` Will Deacon
@ 2011-11-10  8:58             ` Frederic Weisbecker
  2011-11-10 12:12               ` Jason Wessel
  0 siblings, 1 reply; 22+ messages in thread
From: Frederic Weisbecker @ 2011-11-10  8:58 UTC (permalink / raw)
  To: Will Deacon
  Cc: David Ahern, Gleb Natapov, kvm@vger.kernel.org, avi@redhat.com,
	mtosatti@redhat.com, linux-kernel@redhat.com, mingo@elte.hu,
	a.p.zijlstra@chello.nl, acme@ghostprotocols.net, Jason Wessel

On Mon, Nov 07, 2011 at 02:45:17PM +0000, Will Deacon wrote:
> Hi Frederic,
> 
> On Wed, Nov 02, 2011 at 07:42:04AM +0000, Frederic Weisbecker wrote:
> > On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
> > > Right. Originally it could be enabled/disabled. Right now it cannot be,
> > > but I believe Frederic is working on making it configurable again.
> > > 
> > > David
> > 
> > Yep. Will Deacon is working on making the breakpoints able to process
> > pure arch informations (ie: without beeing forced to use the perf attr
> > as a midlayer to define them).
> > 
> > Once we have that I can seperate the breakpoints implementation from perf
> > and make it opt-able.
> 
> How do you foresee kdb fitting into this? I see that currently [on x86] we
> cook up perf_event structures with a specific overflow handler set. If we
> want to move this over to using a completely arch-defined structure, then
> we're going to end up with an overflow handler field in both perf_event
> *and* the arch-specific structure, which doesn't feel right to me.
> 
> Of course, if the goal is only to separate ptrace (i.e. user debugging) from
> the perf dependency then we don't need the overflow handler because we'll
> always just send SIGTRAP to the current task.
> 
> Any ideas?

I don't know if we want to convert x86/kgdb to use pure arch breakpoints.
If kgdb one day wants to extend this use to generic code, it may be a good
idea to keep the things as is. I don't know, I'm adding Jason in Cc.

In any case I think we have a problem if we want to default to send a
SIGTRAP. Look at this:

		bp = per_cpu(bp_per_reg[i], cpu);
		/*
		 * Reset the 'i'th TRAP bit in dr6 to denote completion of
		 * exception handling
		 */
		(*dr6_p) &= ~(DR_TRAP0 << i);
		/*
		 * bp can be NULL due to lazy debug register switching
		 * or due to concurrent perf counter removing.
		 */
		if (!bp) {
			rcu_read_unlock();
			break;
		}

		perf_bp_event(bp, args->regs);


I don't have the details about how lazy the debug register switching
can be. And also we want to avoid a locking between the perf event
scheduling (removing) and the breakpoint triggering path.

A solution is to look at the ptrace breakpoints in the thread
struct and see if the one in the index is there. That can reside
in its own callback or as a fallback in hw_breakpoint_handler().
I don't feel that strong with choosing either of those solutions.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-10  8:58             ` Frederic Weisbecker
@ 2011-11-10 12:12               ` Jason Wessel
  2011-11-15 18:34                 ` Frederic Weisbecker
  0 siblings, 1 reply; 22+ messages in thread
From: Jason Wessel @ 2011-11-10 12:12 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Will Deacon, David Ahern, Gleb Natapov, kvm@vger.kernel.org,
	avi@redhat.com, mtosatti@redhat.com, linux-kernel@redhat.com,
	mingo@elte.hu, a.p.zijlstra@chello.nl, acme@ghostprotocols.net

On 11/10/2011 02:58 AM, Frederic Weisbecker wrote:
> On Mon, Nov 07, 2011 at 02:45:17PM +0000, Will Deacon wrote:
>> Hi Frederic,
>>
>> On Wed, Nov 02, 2011 at 07:42:04AM +0000, Frederic Weisbecker wrote:
>>> On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
>>>> Right. Originally it could be enabled/disabled. Right now it cannot be,
>>>> but I believe Frederic is working on making it configurable again.
>>>>
>>>> David
>>> Yep. Will Deacon is working on making the breakpoints able to process
>>> pure arch informations (ie: without beeing forced to use the perf attr
>>> as a midlayer to define them).
>>>
>>> Once we have that I can seperate the breakpoints implementation from perf
>>> and make it opt-able.
>> How do you foresee kdb fitting into this? I see that currently [on x86] we
>> cook up perf_event structures with a specific overflow handler set. If we
>> want to move this over to using a completely arch-defined structure, then
>> we're going to end up with an overflow handler field in both perf_event
>> *and* the arch-specific structure, which doesn't feel right to me.
>>
>> Of course, if the goal is only to separate ptrace (i.e. user debugging) from
>> the perf dependency then we don't need the overflow handler because we'll
>> always just send SIGTRAP to the current task.
>>
>> Any ideas?
> I don't know if we want to convert x86/kgdb to use pure arch breakpoints.
> If kgdb one day wants to extend this use to generic code, it may be a good
> idea to keep the things as is. I don't know, I'm adding Jason in Cc.

I think the important part is to share the allocation code (meaning who owns which break point slots).  This is  why kgdb/kdb allocates the perf structures.  The kgdb code will also directly write data to the slots once it has reserved them it would be good to share this code, but it was not shared because it was not usable early enough in the boot cycle on x86.

Certainly there are others who could consume the same infrastructure such as kprobes.

Jason.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 6/9] perf: expose perf capability to other modules.
  2011-11-10 12:12               ` Jason Wessel
@ 2011-11-15 18:34                 ` Frederic Weisbecker
  0 siblings, 0 replies; 22+ messages in thread
From: Frederic Weisbecker @ 2011-11-15 18:34 UTC (permalink / raw)
  To: Jason Wessel
  Cc: Will Deacon, David Ahern, Gleb Natapov, kvm@vger.kernel.org,
	avi@redhat.com, mtosatti@redhat.com, linux-kernel@redhat.com,
	mingo@elte.hu, a.p.zijlstra@chello.nl, acme@ghostprotocols.net

On Thu, Nov 10, 2011 at 06:12:23AM -0600, Jason Wessel wrote:
> On 11/10/2011 02:58 AM, Frederic Weisbecker wrote:
> > On Mon, Nov 07, 2011 at 02:45:17PM +0000, Will Deacon wrote:
> >> Hi Frederic,
> >>
> >> On Wed, Nov 02, 2011 at 07:42:04AM +0000, Frederic Weisbecker wrote:
> >>> On Tue, Nov 01, 2011 at 10:20:04AM -0600, David Ahern wrote:
> >>>> Right. Originally it could be enabled/disabled. Right now it cannot be,
> >>>> but I believe Frederic is working on making it configurable again.
> >>>>
> >>>> David
> >>> Yep. Will Deacon is working on making the breakpoints able to process
> >>> pure arch informations (ie: without beeing forced to use the perf attr
> >>> as a midlayer to define them).
> >>>
> >>> Once we have that I can seperate the breakpoints implementation from perf
> >>> and make it opt-able.
> >> How do you foresee kdb fitting into this? I see that currently [on x86] we
> >> cook up perf_event structures with a specific overflow handler set. If we
> >> want to move this over to using a completely arch-defined structure, then
> >> we're going to end up with an overflow handler field in both perf_event
> >> *and* the arch-specific structure, which doesn't feel right to me.
> >>
> >> Of course, if the goal is only to separate ptrace (i.e. user debugging) from
> >> the perf dependency then we don't need the overflow handler because we'll
> >> always just send SIGTRAP to the current task.
> >>
> >> Any ideas?
> > I don't know if we want to convert x86/kgdb to use pure arch breakpoints.
> > If kgdb one day wants to extend this use to generic code, it may be a good
> > idea to keep the things as is. I don't know, I'm adding Jason in Cc.
> 
> I think the important part is to share the allocation code (meaning who owns which break point slots).  This is  why kgdb/kdb allocates the perf structures.  The kgdb code will also directly write data to the slots once it has reserved them it would be good to share this code, but it was not shared because it was not usable early enough in the boot cycle on x86.
> 
> Certainly there are others who could consume the same infrastructure such as kprobes.
> 
> Jason.

Yeah sure, in any case we want to keep the slot allocation/reservation handled in
kernel/event/hw_breakpoint.c

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2011-11-15 18:34 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-03 12:31 [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
2011-11-03 12:31 ` [PATCH 1/9] KVM: Expose kvm_lapic_local_deliver() Gleb Natapov
2011-11-03 12:31 ` [PATCH 2/9] KVM: Expose a version 2 architectural PMU to a guests Gleb Natapov
2011-11-03 12:31 ` [PATCH 3/9] KVM: Add generic RDPMC support Gleb Natapov
2011-11-03 12:31 ` [PATCH 4/9] KVM: SVM: Intercept RDPMC Gleb Natapov
2011-11-03 12:31 ` [PATCH 5/9] KVM: VMX: " Gleb Natapov
2011-11-03 12:31 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
2011-11-03 12:31 ` [PATCH 7/9] KVM: Expose the architectural performance monitoring CPUID leaf Gleb Natapov
2011-11-03 12:31 ` [PATCH 8/9] KVM: x86 emulator: fix RDPMC privilege check Gleb Natapov
2011-11-03 12:31 ` [PATCH 9/9] KVM: x86 emulator: implement RDPMC (0F 33) Gleb Natapov
2011-11-03 12:40 ` [PATCH 0/9] KVM in-guest performance monitoring Gleb Natapov
  -- strict thread matches above, loose matches on Subject: below --
2011-10-30 16:53 Gleb Natapov
2011-10-30 16:53 ` [PATCH 6/9] perf: expose perf capability to other modules Gleb Natapov
2011-11-01 10:49   ` Avi Kivity
2011-11-01 15:49   ` David Ahern
2011-11-01 16:13     ` Gleb Natapov
2011-11-01 16:20       ` David Ahern
2011-11-01 16:41         ` Gleb Natapov
2011-11-02  7:42         ` Frederic Weisbecker
2011-11-07 14:45           ` Will Deacon
2011-11-10  8:58             ` Frederic Weisbecker
2011-11-10 12:12               ` Jason Wessel
2011-11-15 18:34                 ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).