linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os
@ 2010-06-21  9:31 Zhang, Yanmin
  2010-06-21 12:00 ` Avi Kivity
  0 siblings, 1 reply; 6+ messages in thread
From: Zhang, Yanmin @ 2010-06-21  9:31 UTC (permalink / raw)
  To: LKML, kvm, Avi Kivity
  Cc: Ingo Molnar, Fr??d??ric Weisbecker, Arnaldo Carvalho de Melo,
	Cyrill Gorcunov, Lin Ming, Sheng Yang, Marcelo Tosatti,
	oerg Roedel, Jes Sorensen, Gleb Natapov, Zachary Amsden,
	zhiteng.huang, tim.c.chen

The 2nd patch is to change the definition of perf_event to facilitate
perf attr copy when a hypercall happens.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>

---

--- linux-2.6_tip0620/include/linux/perf_event.h	2010-06-21 15:19:52.821999849 +0800
+++ linux-2.6_tip0620perfkvm/include/linux/perf_event.h	2010-06-21 16:53:49.283999849 +0800
@@ -188,7 +188,10 @@ struct perf_event_attr {
 	__u64			sample_type;
 	__u64			read_format;
 
-	__u64			disabled       :  1, /* off by default        */
+	union {
+		__u64		flags;
+		struct {
+			__u64	disabled       :  1, /* off by default        */
 				inherit	       :  1, /* children inherit it   */
 				pinned	       :  1, /* must always be on PMU */
 				exclusive      :  1, /* only group on PMU     */
@@ -217,6 +220,8 @@ struct perf_event_attr {
 				mmap_data      :  1, /* non-exec mmap data    */
 
 				__reserved_1   : 46;
+		};
+	};
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -465,12 +470,6 @@ enum perf_callchain_context {
 # include <asm/local64.h>
 #endif
 
-struct perf_guest_info_callbacks {
-	int (*is_in_guest) (void);
-	int (*is_user_mode) (void);
-	unsigned long (*get_guest_ip) (void);
-};
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <asm/hw_breakpoint.h>
 #endif
@@ -753,6 +752,20 @@ struct perf_event {
 
 	perf_overflow_handler_t		overflow_handler;
 
+	/*
+	 * pointers used by kvm perf paravirt interface.
+	 *
+	 * 1) Used in host kernel and points to host_perf_shadow which
+	 * has information about guest perf_event
+	 */
+	void				*host_perf_shadow;
+	/*
+	 * 2) Used in guest kernel and points to guest_perf_shadow which
+	 * is used as a communication area with host kernel. Host kernel
+	 * copies overflow data to it when an event overflows.
+	 */
+	void				*guest_perf_shadow;
+
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call	*tp_event;
 	struct event_filter		*filter;
@@ -838,6 +851,16 @@ struct perf_output_handle {
 	int				sample;
 };
 
+struct perf_guest_info_callbacks {
+	/* Support collect guest statistics from host side */
+	int (*is_in_guest) (void);
+	int (*is_user_mode) (void);
+	unsigned long (*get_guest_ip) (void);
+
+	/* Support paravirt interface */
+	void (*copy_event_to_shadow) (struct perf_event *event, int overflows);
+};
+
 #ifdef CONFIG_PERF_EVENTS
 
 /*
@@ -871,6 +894,10 @@ perf_event_create_kernel_counter(struct 
 				perf_overflow_handler_t callback);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
+extern void perf_event_output(struct perf_event *event, int nmi,
+		struct perf_sample_data *data, struct pt_regs *regs);
+void perf_event_attach(struct perf_event *event);
+void perf_event_detach(struct perf_event *event);
 
 struct perf_sample_data {
 	u64				type;
@@ -1023,6 +1050,14 @@ perf_event_task_sched_in(struct task_str
 static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next)			{ }
+
+static inline void
+perf_event_output(struct perf_event *event, int nmi,
+		struct perf_sample_data *data, struct pt_regs *regs)	{ }
+
+static inline void perf_event_attach(struct perf_event *event)		{ }
+static inline void perf_event_detach(struct perf_event *event)		{ }
+
 static inline void
 perf_event_task_tick(struct task_struct *task)				{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
--- linux-2.6_tip0620/kernel/watchdog.c	2010-06-21 15:20:48.517999849 +0800
+++ linux-2.6_tip0620perfkvm/kernel/watchdog.c	2010-06-21 15:21:39.315999849 +0800
@@ -197,8 +197,6 @@ static struct perf_event_attr wd_hw_attr
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
 	.size		= sizeof(struct perf_event_attr),
-	.pinned		= 1,
-	.disabled	= 1,
 };
 
 /* Callback function for perf event subsystem */
@@ -361,6 +359,8 @@ static int watchdog_nmi_enable(int cpu)
 	/* Try to register using hardware perf events */
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period();
+	wd_attr->pinned = 1;
+	wd_attr->disabled = 1;
 	event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
 	if (!IS_ERR(event)) {
 		printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
--- linux-2.6_tip0620/kernel/perf_event.c	2010-06-21 15:20:49.013999849 +0800
+++ linux-2.6_tip0620perfkvm/kernel/perf_event.c	2010-06-21 16:52:35.432999849 +0800
@@ -32,6 +32,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/kvm_para.h>
 
 #include <asm/irq_regs.h>
 
@@ -747,6 +748,7 @@ static int group_can_go_on(struct perf_e
 	 */
 	if (event->attr.exclusive && cpuctx->active_oncpu)
 		return 0;
+
 	/*
 	 * Otherwise, try to add it if all previous groups were able
 	 * to go on.
@@ -1613,6 +1615,7 @@ void perf_event_task_tick(struct task_st
 	struct perf_cpu_context *cpuctx;
 	struct perf_event_context *ctx;
 	int rotate = 0;
+	int adjust_freq = 1;
 
 	if (!atomic_read(&nr_events))
 		return;
@@ -1626,9 +1629,22 @@ void perf_event_task_tick(struct task_st
 	if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
 		rotate = 1;
 
-	perf_ctx_adjust_freq(&cpuctx->ctx);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx);
+#ifdef CONFIG_KVM_PERF
+	if (kvm_para_available()) {
+		/*
+		 * perf_ctx_adjust_freq causes lots of pmu->read which would
+		 * trigger too many vmexit to host kernel. We disable it
+		 * under para virt situation
+		 */
+		adjust_freq = 0;
+	}
+#endif
+
+	if (adjust_freq) {
+		perf_ctx_adjust_freq(&cpuctx->ctx);
+		if (ctx)
+			perf_ctx_adjust_freq(ctx);
+	}
 
 	if (!rotate)
 		return;
@@ -3434,7 +3450,7 @@ void perf_prepare_sample(struct perf_eve
 	}
 }
 
-static void perf_event_output(struct perf_event *event, int nmi,
+void perf_event_output(struct perf_event *event, int nmi,
 				struct perf_sample_data *data,
 				struct pt_regs *regs)
 {
@@ -5261,6 +5277,47 @@ perf_event_create_kernel_counter(struct 
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
+void perf_event_attach(struct perf_event *event)
+{
+	struct perf_event_context *old_ctx, *new_ctx;
+
+	old_ctx = event->ctx;
+	new_ctx = find_get_context(current->pid, -1);
+	if (old_ctx != new_ctx) {
+		if (old_ctx) {
+			/* Delete from old ctx before joining new ctx */
+			mutex_lock(&old_ctx->mutex);
+			raw_spin_lock(&old_ctx->lock);
+			list_del_event(event, old_ctx);
+			raw_spin_unlock(&old_ctx->lock);
+			mutex_unlock(&old_ctx->mutex);
+			put_ctx(old_ctx);
+		}
+
+		mutex_lock(&new_ctx->mutex);
+		raw_spin_lock(&new_ctx->lock);
+		list_add_event(event, new_ctx);
+		event->ctx = new_ctx;
+		raw_spin_unlock(&new_ctx->lock);
+		mutex_unlock(&new_ctx->mutex);
+	} else
+		put_ctx(new_ctx);
+
+	perf_event_enable(event);
+}
+EXPORT_SYMBOL_GPL(perf_event_attach);
+
+void perf_event_detach(struct perf_event *event)
+{
+	/*
+	 * Just disable the event and don't del it from
+	 * ctx->event_list in case there is a race condition
+	 * with perf_event_read_value
+	 */
+	perf_event_disable(event);
+}
+EXPORT_SYMBOL_GPL(perf_event_detach);
+
 /*
  * inherit a event from parent task to child task:
  */



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-06-22  9:41 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-21  9:31 [PATCH V2 2/5] ara virt interface of perf to support kvm guest os statistics collection in guest os Zhang, Yanmin
2010-06-21 12:00 ` Avi Kivity
2010-06-22  2:08   ` Zhang, Yanmin
2010-06-22  9:12     ` Avi Kivity
2010-06-22  9:25       ` Zhang, Yanmin
2010-06-22  9:41         ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).