[PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
@ 2010-06-21  9:31 Zhang, Yanmin
  2010-06-21 12:00 ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Yanmin @ 2010-06-21  9:31 UTC (permalink / raw)
  To: LKML, kvm, Avi Kivity
  Cc: Ingo Molnar, Fr??d??ric Weisbecker, Arnaldo Carvalho de Melo,
	Cyrill Gorcunov, Lin Ming, Sheng Yang, Marcelo Tosatti,
	oerg Roedel, Jes Sorensen, Gleb Natapov, Zachary Amsden,
	zhiteng.huang, tim.c.chen

The 2nd patch is to change the definition of perf_event to facilitate
perf attr copy when a hypercall happens.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>

---

--- linux-2.6_tip0620/include/linux/perf_event.h	2010-06-21 15:19:52.821999849 +0800
+++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h	2010-06-21 16:53:49.283999849 +0800
@@ -188,7 +188,10 @@ struct perf_event_attr {
 	__u64			sample_type;
 	__u64			read_format;
 
-	__u64			disabled       :  1, /* off by default        */
+	union {
+		__u64		flags;
+		struct {
+			__u64	disabled       :  1, /* off by default        */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -217,6 +220,8 @@ struct perf_event_attr {
 				mmap_data      :  1, /* non-exec mmap data    */
 
 				__reserved_1   : 46;
+		};
+	};
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -465,12 +470,6 @@ enum perf_callchain_context {
 # include <asm/local64.h>
 #endif
 
-struct perf_guest_info_callbacks {
-	int (*is_in_guest) (void);
-	int (*is_user_mode) (void);
-	unsigned long (*get_guest_ip) (void);
-};
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <asm/hw_breakpoint.h>
 #endif
@@ -753,6 +752,20 @@ struct perf_event {
 
 	perf_overflow_handler_t		overflow_handler;
 
+	/*
+	 * pointers used by kvm perf paravirt interface.
+	 *
+	 * 1) Used in host kernel and points to host_perf_shadow which
+	 * has information about guest perf_event
+	 */
+	void				*host_perf_shadow;
+	/*
+	 * 2) Used in guest kernel and points to guest_perf_shadow which
+	 * is used as a communication area with host kernel. Host kernel
+	 * copies overflow data to it when an event overflows.
+	 */
+	void				*guest_perf_shadow;
+
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call	*tp_event;
 	struct event_filter		*filter;
@@ -838,6 +851,16 @@ struct perf_output_handle {
 	int				sample;
 };
 
+struct perf_guest_info_callbacks {
+	/* Support collect guest statistics from host side */
+	int (*is_in_guest) (void);
+	int (*is_user_mode) (void);
+	unsigned long (*get_guest_ip) (void);
+
+	/* Support paravirt interface */
+	void (*copy_event_to_shadow) (struct perf_event *event, int overflows);
+};
+
 #ifdef CONFIG_PERF_EVENTS
 
 /*
@@ -871,6 +894,10 @@ perf_event_create_kernel_counter(struct 
 				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
+extern void perf_event_output(struct perf_event *event, int nmi,
+		struct perf_sample_data *data, struct pt_regs *regs);
+void perf_event_attach(struct perf_event *event);
+void perf_event_detach(struct perf_event *event);
 
 struct perf_sample_data {
 	u64				type;
@@ -1023,6 +1050,14 @@ perf_event_task_sched_in(struct task_str
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next)			{ }
+
+static inline void
+perf_event_output(struct perf_event *event, int nmi,
+		struct perf_sample_data *data, struct pt_regs *regs)	{ }
+
+static inline void perf_event_attach(struct perf_event *event)		{ }
+static inline void perf_event_detach(struct perf_event *event)		{ }
+
 static inline void
 perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
--- linux-2.6_tip0620/kernel/watchdog.c	2010-06-21 15:20:48.517999849 +0800
+++ linux-2.6_tip0620perfkvm/kernel/watchdog.c	2010-06-21 15:21:39.315999849 +0800
@@ -197,8 +197,6 @@ static struct perf_event_attr wd_hw_attr
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
 	.size		= sizeof(struct perf_event_attr),
-	.pinned		= 1,
-	.disabled	= 1,
 };
 
 /* Callback function for perf event subsystem */
@@ -361,6 +359,8 @@ static int watchdog_nmi_enable(int cpu)
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period();
+	wd_attr->pinned = 1;
+	wd_attr->disabled = 1;
 	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
--- linux-2.6_tip0620/kernel/perf_event.c	2010-06-21 15:20:49.013999849 +0800
+++ linux-2.6_tip0620perfkvm/kernel/perf_event.c	2010-06-21 16:52:35.432999849 +0800
@@ -32,6 +32,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/kvm_para.h>
 
 #include <asm/irq_regs.h>
 
@@ -747,6 +748,7 @@ static int group_can_go_on(struct perf_e
 	 */
 	if (event->attr.exclusive && cpuctx->active_oncpu)
 		return 0;
+
 	/*
 	 * Otherwise, try to add it if all previous groups were able
 	 * to go on.
@@ -1613,6 +1615,7 @@ void perf_event_task_tick(struct task_st
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	int rotate = 0;
+	int adjust_freq = 1;
 
 	if (!atomic_read(&nr_events))
 		return;
@@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
 	if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
 		rotate = 1;
 
-	perf_ctx_adjust_freq(&cpuctx->ctx);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx);
+#ifdef CONFIG_KVM_PERF
+	if (kvm_para_available()) {
+		/*
+		 * perf_ctx_adjust_freq causes lots of pmu->read which would
+		 * trigger too many vmexit to host kernel. We disable it
+		 * under para virt situation
+		 */
+		adjust_freq = 0;
+	}
+#endif
+
+	if (adjust_freq) {
+		perf_ctx_adjust_freq(&cpuctx->ctx);
+		if (ctx)
+			perf_ctx_adjust_freq(ctx);
+	}
 
 	if (!rotate)
 		return;
@@ -3434,7 +3450,7 @@ void perf_prepare_sample(struct perf_eve
 	}
 }
 
-static void perf_event_output(struct perf_event *event, int nmi,
+void perf_event_output(struct perf_event *event, int nmi,
 				struct perf_sample_data *data,
 				struct pt_regs *regs)
 {
@@ -5261,6 +5277,47 @@ perf_event_create_kernel_counter(struct 
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
+void perf_event_attach(struct perf_event *event)
+{
+	struct perf_event_context *old_ctx, *new_ctx;
+
+	old_ctx = event->ctx;
+	new_ctx = find_get_context(current->pid, -1);
+	if (old_ctx != new_ctx) {
+		if (old_ctx) {
+			/* Delete from old ctx before joining new ctx */
+			mutex_lock(&old_ctx->mutex);
+			raw_spin_lock(&old_ctx->lock);
+			list_del_event(event, old_ctx);
+			raw_spin_unlock(&old_ctx->lock);
+			mutex_unlock(&old_ctx->mutex);
+			put_ctx(old_ctx);
+		}
+
+		mutex_lock(&new_ctx->mutex);
+		raw_spin_lock(&new_ctx->lock);
+		list_add_event(event, new_ctx);
+		event->ctx = new_ctx;
+		raw_spin_unlock(&new_ctx->lock);
+		mutex_unlock(&new_ctx->mutex);
+	} else
+		put_ctx(new_ctx);
+
+	perf_event_enable(event);
+}
+EXPORT_SYMBOL_GPL(perf_event_attach);
+
+void perf_event_detach(struct perf_event *event)
+{
+	/*
+	 * Just disable the event and don't del it from
+	 * ctx->event_list in case there is a race condition
+	 * with perf_event_read_value
+	 */
+	perf_event_disable(event);
+}
+EXPORT_SYMBOL_GPL(perf_event_detach);
+
 /*
  * inherit a event from parent task to child task:
  */



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
  2010-06-21  9:31 [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os Zhang, Yanmin
@ 2010-06-21 12:00 ` Avi Kivity
  2010-06-22  2:08   ` Zhang, Yanmin
  0 siblings, 1 reply; 6+ messages in thread
From: Avi Kivity @ 2010-06-21 12:00 UTC (permalink / raw)
  To: Zhang, Yanmin
  Cc: LKML, kvm, Ingo Molnar, Fr??d??ric Weisbecker,
	Arnaldo Carvalho de Melo, Cyrill Gorcunov, Lin Ming, Sheng Yang,
	Marcelo Tosatti, oerg Roedel, Jes Sorensen, Gleb Natapov,
	Zachary Amsden, zhiteng.huang, tim.c.chen

On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:
> The 2nd patch is to change the definition of perf_event to facilitate
> perf attr copy when a hypercall happens.
>
> Signed-off-by: Zhang Yanmin<yanmin_zhang@linux.intel.com>
>
> ---
>
> --- linux-2.6_tip0620/include/linux/perf_event.h	2010-06-21 15:19:52.821999849 +0800
> +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h	2010-06-21 16:53:49.283999849 +0800
> @@ -188,7 +188,10 @@ struct perf_event_attr {
>   	__u64			sample_type;
>   	__u64			read_format;
>
>    

Assuming these flags are available to the guest?

> -	__u64			disabled       :  1, /* off by default        */
> +	union {
> +		__u64		flags;
> +		struct {
> +			__u64	disabled       :  1, /* off by default        */
>   				inherit	       :  1, /* children inherit it   */
>    

inherit is meaningless for a guest.

>   				pinned	       :  1, /* must always be on PMU */
>    

We cannot allow a guest to pin a counter.

The other flags are also problematic.  I'd like to see virt-specific 
flags (probably we'll only need kernel/user and nested_hv for nested 
virtualization).

Something that is worrying is that we don't expose group information.  
perf will multiplex the events for us, but there will be a loss in accuracy.

>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>   #include<asm/hw_breakpoint.h>
>   #endif
> @@ -753,6 +752,20 @@ struct perf_event {
>
>   	perf_overflow_handler_t		overflow_handler;
>
> +	/*
> +	 * pointers used by kvm perf paravirt interface.
> +	 *
> +	 * 1) Used in host kernel and points to host_perf_shadow which
> +	 * has information about guest perf_event
> +	 */
> +	void				*host_perf_shadow;
>    

Can we have real types instead of void pointers?

> +	/*
> +	 * 2) Used in guest kernel and points to guest_perf_shadow which
> +	 * is used as a communication area with host kernel. Host kernel
> +	 * copies overflow data to it when an event overflows.
> +	 */
> +	void				*guest_perf_shadow;
>    

It's strange to see both guest and host parts in the same patch.  
Splitting to separate patches will really help review.

> @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
>   	if (ctx&&  ctx->nr_events&&  ctx->nr_events != ctx->nr_active)
>   		rotate = 1;
>
> -	perf_ctx_adjust_freq(&cpuctx->ctx);
> -	if (ctx)
> -		perf_ctx_adjust_freq(ctx);
> +#ifdef CONFIG_KVM_PERF
> +	if (kvm_para_available()) {
> +		/*
> +		 * perf_ctx_adjust_freq causes lots of pmu->read which would
> +		 * trigger too many vmexit to host kernel. We disable it
> +		 * under para virt situation
> +		 */
> +		adjust_freq = 0;
> +	}
> +#endif
>    

Perhaps we can have a batch read interface which will read many counters 
at once.  This would reduce the number of exits.  Also adjust the 
frequency less frequently.

> +
> +	if (adjust_freq) {
> +		perf_ctx_adjust_freq(&cpuctx->ctx);
> +		if (ctx)
> +			perf_ctx_adjust_freq(ctx);
> +	}
>
>    

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
  2010-06-21 12:00 ` Avi Kivity
@ 2010-06-22  2:08   ` Zhang, Yanmin
  2010-06-22  9:12     ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Yanmin @ 2010-06-22  2:08 UTC (permalink / raw)
  To: Avi Kivity
  Cc: LKML, kvm, Ingo Molnar, Fr??d??ric Weisbecker,
	Arnaldo Carvalho de Melo, Cyrill Gorcunov, Lin Ming, Sheng Yang,
	Marcelo Tosatti, oerg Roedel, Jes Sorensen, Gleb Natapov,
	Zachary Amsden, zhiteng.huang, tim.c.chen

On Mon, 2010-06-21 at 15:00 +0300, Avi Kivity wrote:
> On 06/21/2010 12:31 PM, Zhang, Yanmin wrote:
> > The 2nd patch is to change the definition of perf_event to facilitate
> > perf attr copy when a hypercall happens.
> >
> > Signed-off-by: Zhang Yanmin<yanmin_zhang@linux.intel.com>
> >
> > ---
> >
> > --- linux-2.6_tip0620/include/linux/perf_event.h	2010-06-21 15:19:52.821999849 +0800
> > +++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h	2010-06-21 16:53:49.283999849 +0800
> > @@ -188,7 +188,10 @@ struct perf_event_attr {
> >   	__u64			sample_type;
> >   	__u64			read_format;
> >
> >    
> 
> Assuming these flags are available to the guest?
These flags are used by generic perf codes. To match with host kernel, we wish
guest os also use the flags.

> 
> > -	__u64			disabled       :  1, /* off by default        */
> > +	union {
> > +		__u64		flags;
> > +		struct {
> > +			__u64	disabled       :  1, /* off by default        */
> >   				inherit	       :  1, /* children inherit it   */
> >    
> 
> inherit is meaningless for a guest.
Right. host kernel will reset it to 0 before create perf_event for guest os.
Here we couldn't delete the flag as it's used by perf generic codes.
I need separate the patch a little better. All definitions in include/linux/perf_event.h
are mostly perf generic code related. I'm very careful to change it.

> 
> >   				pinned	       :  1, /* must always be on PMU */
> >    
> 
> We cannot allow a guest to pin a counter.
Ok. I will reset it in function kvm_pv_perf_op_open.

> 
> The other flags are also problematic.  I'd like to see virt-specific 
> flags (probably we'll only need kernel/user and nested_hv for nested 
> virtualization).
How about to add more comments around struct guest_perf_attr->flags that
guest os developers should take a look at include/linux/perf_event.h?
BTW, I will reset more flags to 0 in kvm_pv_perf_op_open.

> 
> Something that is worrying is that we don't expose group information.  
> perf will multiplex the events for us, but there will be a loss in accuracy.
> 
> >   #ifdef CONFIG_HAVE_HW_BREAKPOINT
> >   #include<asm/hw_breakpoint.h>
> >   #endif
> > @@ -753,6 +752,20 @@ struct perf_event {
> >
> >   	perf_overflow_handler_t		overflow_handler;
> >
> > +	/*
> > +	 * pointers used by kvm perf paravirt interface.
> > +	 *
> > +	 * 1) Used in host kernel and points to host_perf_shadow which
> > +	 * has information about guest perf_event
> > +	 */
> > +	void				*host_perf_shadow;
> >    
> 
> Can we have real types instead of void pointers?
I just want perf generic codes have less dependency on KVM codes.

> 
> > +	/*
> > +	 * 2) Used in guest kernel and points to guest_perf_shadow which
> > +	 * is used as a communication area with host kernel. Host kernel
> > +	 * copies overflow data to it when an event overflows.
> > +	 */
> > +	void				*guest_perf_shadow;
> >    
> 
> It's strange to see both guest and host parts in the same patch.  
> Splitting to separate patches will really help review.
It's a little hard to split the patches if they change the same file. Perhaps
I could add more statements before the patch when I send it out.

> 
> > @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
> >   	if (ctx&&  ctx->nr_events&&  ctx->nr_events != ctx->nr_active)
> >   		rotate = 1;
> >
> > -	perf_ctx_adjust_freq(&cpuctx->ctx);
> > -	if (ctx)
> > -		perf_ctx_adjust_freq(ctx);
> > +#ifdef CONFIG_KVM_PERF
> > +	if (kvm_para_available()) {
> > +		/*
> > +		 * perf_ctx_adjust_freq causes lots of pmu->read which would
> > +		 * trigger too many vmexit to host kernel. We disable it
> > +		 * under para virt situation
> > +		 */
> > +		adjust_freq = 0;
> > +	}
> > +#endif
> >    
> 
> Perhaps we can have a batch read interface which will read many counters 
> at once.
It's a good idea. But that will touch many perf generic codes which causes it's hard
to maintain or follow future changes.

>   This would reduce the number of exits.  Also adjust the 
> frequency less frequently.
Here it depends on process scheduler frequency, CONFIG_HZ. 

> 
> > +
> > +	if (adjust_freq) {
> > +		perf_ctx_adjust_freq(&cpuctx->ctx);
> > +		if (ctx)
> > +			perf_ctx_adjust_freq(ctx);
> > +	}
> >
> >    
> 



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
  2010-06-22  2:08   ` Zhang, Yanmin
@ 2010-06-22  9:12     ` Avi Kivity
  2010-06-22  9:25       ` Zhang, Yanmin
  0 siblings, 1 reply; 6+ messages in thread
From: Avi Kivity @ 2010-06-22  9:12 UTC (permalink / raw)
  To: Zhang, Yanmin
  Cc: LKML, kvm, Ingo Molnar, Fr??d??ric Weisbecker,
	Arnaldo Carvalho de Melo, Cyrill Gorcunov, Lin Ming, Sheng Yang,
	Marcelo Tosatti, oerg Roedel, Jes Sorensen, Gleb Natapov,
	Zachary Amsden, zhiteng.huang, tim.c.chen

On 06/22/2010 05:08 AM, Zhang, Yanmin wrote:
>
>> Something that is worrying is that we don't expose group information.
>> perf will multiplex the events for us, but there will be a loss in accuracy.
>>
>>      
>>>    #ifdef CONFIG_HAVE_HW_BREAKPOINT
>>>    #include<asm/hw_breakpoint.h>
>>>    #endif
>>> @@ -753,6 +752,20 @@ struct perf_event {
>>>
>>>    	perf_overflow_handler_t		overflow_handler;
>>>
>>> +	/*
>>> +	 * pointers used by kvm perf paravirt interface.
>>> +	 *
>>> +	 * 1) Used in host kernel and points to host_perf_shadow which
>>> +	 * has information about guest perf_event
>>> +	 */
>>> +	void				*host_perf_shadow;
>>>
>>>        
>> Can we have real types instead of void pointers?
>>      
> I just want perf generic codes have less dependency on KVM codes.
>    

One way to do that and retain type safety is to have

     struct perf_client {
           struct perf_client_ops *ops;
           ...
     }

The client (kvm) can do

    struct kvm_perf_client {
          struct perf_client pc;
          // kvm specific stuff
    };

the callbacks receive struct perf_client and use container_of to reach 
the kvm_perf_client that contains it.

>>> +	/*
>>> +	 * 2) Used in guest kernel and points to guest_perf_shadow which
>>> +	 * is used as a communication area with host kernel. Host kernel
>>> +	 * copies overflow data to it when an event overflows.
>>> +	 */
>>> +	void				*guest_perf_shadow;
>>>
>>>        
>> It's strange to see both guest and host parts in the same patch.
>> Splitting to separate patches will really help review.
>>      
> It's a little hard to split the patches if they change the same file. Perhaps
> I could add more statements before the patch when I send it out.
>    

With git, it's easy (once you're used to it):

   # go back one commit:
   git reset HEAD^
   # selectively add bits:
   git add -p
   # commit first patch
   git commit -s
   # selectively add bits:
   git add -p
   # commit second patch
   git commit -s


>>> @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
>>>    	if (ctx&&   ctx->nr_events&&   ctx->nr_events != ctx->nr_active)
>>>    		rotate = 1;
>>>
>>> -	perf_ctx_adjust_freq(&cpuctx->ctx);
>>> -	if (ctx)
>>> -		perf_ctx_adjust_freq(ctx);
>>> +#ifdef CONFIG_KVM_PERF
>>> +	if (kvm_para_available()) {
>>> +		/*
>>> +		 * perf_ctx_adjust_freq causes lots of pmu->read which would
>>> +		 * trigger too many vmexit to host kernel. We disable it
>>> +		 * under para virt situation
>>> +		 */
>>> +		adjust_freq = 0;
>>> +	}
>>> +#endif
>>>
>>>        
>> Perhaps we can have a batch read interface which will read many counters
>> at once.
>>      
> It's a good idea. But that will touch many perf generic codes which causes it's hard
> to maintain or follow future changes.
>    

I'm talking about the guest/host interface.  So you have one vmexit and 
many host perf calls.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
  2010-06-22  9:12     ` Avi Kivity
@ 2010-06-22  9:25       ` Zhang, Yanmin
  2010-06-22  9:41         ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Yanmin @ 2010-06-22  9:25 UTC (permalink / raw)
  To: Avi Kivity
  Cc: LKML, kvm, Ingo Molnar, Fr??d??ric Weisbecker,
	Arnaldo Carvalho de Melo, Cyrill Gorcunov, Lin Ming, Sheng Yang,
	Marcelo Tosatti, oerg Roedel, Jes Sorensen, Gleb Natapov,
	Zachary Amsden, zhiteng.huang, tim.c.chen, Peter Zijlstra

On Tue, 2010-06-22 at 12:12 +0300, Avi Kivity wrote:
> On 06/22/2010 05:08 AM, Zhang, Yanmin wrote:
> >
> >> Something that is worrying is that we don't expose group information.
> >> perf will multiplex the events for us, but there will be a loss in accuracy.
> >>
> >>      
> >>>    #ifdef CONFIG_HAVE_HW_BREAKPOINT
> >>>    #include<asm/hw_breakpoint.h>
> >>>    #endif
> >>> @@ -753,6 +752,20 @@ struct perf_event {
> >>>
> >>>    	perf_overflow_handler_t		overflow_handler;
> >>>
> >>> +	/*
> >>> +	 * pointers used by kvm perf paravirt interface.
> >>> +	 *
> >>> +	 * 1) Used in host kernel and points to host_perf_shadow which
> >>> +	 * has information about guest perf_event
> >>> +	 */
> >>> +	void				*host_perf_shadow;
> >>>
> >>>        
> >> Can we have real types instead of void pointers?
> >>      
> > I just want perf generic codes have less dependency on KVM codes.
> >    
> 
> One way to do that and retain type safety is to have
> 
>      struct perf_client {
>            struct perf_client_ops *ops;
>            ...
>      }
> 
> The client (kvm) can do
> 
>     struct kvm_perf_client {
>           struct perf_client pc;
>           // kvm specific stuff
>     };
> 
> the callbacks receive struct perf_client and use container_of to reach 
> the kvm_perf_client that contains it.
Let me double check it.

> 
> >>> +	/*
> >>> +	 * 2) Used in guest kernel and points to guest_perf_shadow which
> >>> +	 * is used as a communication area with host kernel. Host kernel
> >>> +	 * copies overflow data to it when an event overflows.
> >>> +	 */
> >>> +	void				*guest_perf_shadow;
> >>>
> >>>        
> >> It's strange to see both guest and host parts in the same patch.
> >> Splitting to separate patches will really help review.
> >>      
> > It's a little hard to split the patches if they change the same file. Perhaps
> > I could add more statements before the patch when I send it out.
> >    
> 
> With git, it's easy (once you're used to it):
> 
>    # go back one commit:
>    git reset HEAD^
>    # selectively add bits:
>    git add -p
>    # commit first patch
>    git commit -s
>    # selectively add bits:
>    git add -p
>    # commit second patch
>    git commit -s
Thanks for your teaching.

> 
> 
> >>> @@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
> >>>    	if (ctx&&   ctx->nr_events&&   ctx->nr_events != ctx->nr_active)
> >>>    		rotate = 1;
> >>>
> >>> -	perf_ctx_adjust_freq(&cpuctx->ctx);
> >>> -	if (ctx)
> >>> -		perf_ctx_adjust_freq(ctx);
> >>> +#ifdef CONFIG_KVM_PERF
> >>> +	if (kvm_para_available()) {
> >>> +		/*
> >>> +		 * perf_ctx_adjust_freq causes lots of pmu->read which would
> >>> +		 * trigger too many vmexit to host kernel. We disable it
> >>> +		 * under para virt situation
> >>> +		 */
> >>> +		adjust_freq = 0;
> >>> +	}
> >>> +#endif
> >>>
> >>>        
> >> Perhaps we can have a batch read interface which will read many counters
> >> at once.
> >>      
> > It's a good idea. But that will touch many perf generic codes which causes it's hard
> > to maintain or follow future changes.
> >    
> 
> I'm talking about the guest/host interface.  So you have one vmexit and 
> many host perf calls.
I understood what you were speaking. I mean, perf generic codes operate perf_event
one by one. At low layer, we just know one perf_event before calling hypercall to
vmexit to host kernel.




^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
  2010-06-22  9:25       ` Zhang, Yanmin
@ 2010-06-22  9:41         ` Avi Kivity
  0 siblings, 0 replies; 6+ messages in thread
From: Avi Kivity @ 2010-06-22  9:41 UTC (permalink / raw)
  To: Zhang, Yanmin
  Cc: LKML, kvm, Ingo Molnar, Fr??d??ric Weisbecker,
	Arnaldo Carvalho de Melo, Cyrill Gorcunov, Lin Ming, Sheng Yang,
	Marcelo Tosatti, oerg Roedel, Jes Sorensen, Gleb Natapov,
	Zachary Amsden, zhiteng.huang, tim.c.chen, Peter Zijlstra

On 06/22/2010 12:25 PM, Zhang, Yanmin wrote:
>
>> I'm talking about the guest/host interface.  So you have one vmexit and
>> many host perf calls.
>>      
> I understood what you were speaking. I mean, perf generic codes operate perf_event
> one by one. At low layer, we just know one perf_event before calling hypercall to
> vmexit to host kernel.
>    

Ah.

We might fix that by having the perf ops work on guest memory, and add a 
commit that transfers them to the host.  But this is complicated and can 
be left for later.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-06-22  9:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-21  9:31 [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os Zhang, Yanmin
2010-06-21 12:00 ` Avi Kivity
2010-06-22  2:08   ` Zhang, Yanmin
2010-06-22  9:12     ` Avi Kivity
2010-06-22  9:25       ` Zhang, Yanmin
2010-06-22  9:41         ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).