linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit
@ 2009-10-27  4:47 Anton Blanchard
  2009-10-27  4:49 ` [PATCH 2/6] powerpc: tracing: Add powerpc tracepoints for timer " Anton Blanchard
  2009-10-27 22:00 ` [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Steven Rostedt
  0 siblings, 2 replies; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:47 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, benh; +Cc: linuxppc-dev


This patch adds powerpc specific tracepoints for interrupt entry and exit.

While we already have generic irq_handler_entry and irq_handler_exit
tracepoints there are cases on our virtualised powerpc machines where an
interrupt is presented to the OS, but subsequently handled by the hypervisor.
This means no OS interrupt handler is invoked.

Here is an example on a POWER6 machine with the patch below applied:
 
<idle>-0     [006]  3243.949840744: irq_entry: pt_regs=c0000000ce31fb10
<idle>-0     [006]  3243.949850520: irq_exit: pt_regs=c0000000ce31fb10

<idle>-0     [007]  3243.950218208: irq_entry: pt_regs=c0000000ce323b10
<idle>-0     [007]  3243.950224080: irq_exit: pt_regs=c0000000ce323b10

<idle>-0     [000]  3244.021879320: irq_entry: pt_regs=c000000000a63aa0
<idle>-0     [000]  3244.021883616: irq_handler_entry: irq=87 handler=eth0
<idle>-0     [000]  3244.021887328: irq_handler_exit: irq=87 return=handled
<idle>-0     [000]  3244.021897408: irq_exit: pt_regs=c000000000a63aa0

Here we see two phantom interrupts (no handler was invoked), followed
by a real interrupt for eth0. Without the tracepoints in this patch we
would have missed the phantom interrupts.

Signed-off-by: Anton Blanchard <anton@samba.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
--

No change to this patch.

Index: linux.trees.git/arch/powerpc/include/asm/trace.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux.trees.git/arch/powerpc/include/asm/trace.h	2009-10-17 08:45:08.000000000 +1100
@@ -0,0 +1,53 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM powerpc
+
+#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWERPC_H
+
+#include <linux/tracepoint.h>
+
+struct pt_regs;
+
+TRACE_EVENT(irq_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(irq_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+#endif /* _TRACE_POWERPC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH asm
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
Index: linux.trees.git/arch/powerpc/kernel/irq.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/irq.c	2009-10-17 08:44:32.000000000 +1100
+++ linux.trees.git/arch/powerpc/kernel/irq.c	2009-10-17 08:45:44.000000000 +1100
@@ -70,6 +70,8 @@
 #include <asm/firmware.h>
 #include <asm/lv1call.h>
 #endif
+#define CREATE_TRACE_POINTS
+#include <asm/trace.h>
 
 int __irq_offset_value;
 static int ppc_spurious_interrupts;
@@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned int irq;
 
+	trace_irq_entry(regs);
+
 	irq_enter();
 
 	check_stack_overflow();
@@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs)
 		timer_interrupt(regs);
 	}
 #endif
+
+	trace_irq_exit(regs);
 }
 
 void __init init_IRQ(void)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/6] powerpc: tracing: Add powerpc tracepoints for timer entry and exit
  2009-10-27  4:47 [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Anton Blanchard
@ 2009-10-27  4:49 ` Anton Blanchard
  2009-10-27  4:50   ` [PATCH 3/6] powerpc: tracing: Add hypervisor call tracepoints Anton Blanchard
  2009-10-27 22:00 ` [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Steven Rostedt
  1 sibling, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:49 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, benh; +Cc: linuxppc-dev


We can monitor the effectiveness of our power management of both the
kernel and hypervisor by probing the timer interrupt. For example, on
this box we see 10.37s timer interrupts on an idle core:

<idle>-0     [010]  3900.671297: timer_interrupt_entry: pt_regs=c0000000ce1e7b10
<idle>-0     [010]  3900.671302: timer_interrupt_exit: pt_regs=c0000000ce1e7b10

<idle>-0     [010]  3911.042963: timer_interrupt_entry: pt_regs=c0000000ce1e7b10
<idle>-0     [010]  3911.042968: timer_interrupt_exit: pt_regs=c0000000ce1e7b10

<idle>-0     [010]  3921.414630: timer_interrupt_entry: pt_regs=c0000000ce1e7b10
<idle>-0     [010]  3921.414635: timer_interrupt_exit: pt_regs=c0000000ce1e7b10

Since we have a 207MHz decrementer it will go negative and fire every 10.37s
even if Linux is completely idle.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.trees.git/arch/powerpc/kernel/time.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/time.c	2009-10-07 17:21:21.000000000 +1100
+++ linux.trees.git/arch/powerpc/kernel/time.c	2009-10-07 17:21:52.000000000 +1100
@@ -54,6 +54,7 @@
 #include <linux/irq.h>
 #include <linux/delay.h>
 #include <linux/perf_event.h>
+#include <asm/trace.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * re
 	struct clock_event_device *evt = &decrementer->event;
 	u64 now;
 
+	trace_timer_interrupt_entry(regs);
+
 	/* Ensure a positive value is written to the decrementer, or else
 	 * some CPUs will continuue to take decrementer exceptions */
 	set_dec(DECREMENTER_MAX);
@@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * re
 		now = decrementer->next_tb - now;
 		if (now <= DECREMENTER_MAX)
 			set_dec((int)now);
+		trace_timer_interrupt_exit(regs);
 		return;
 	}
 	old_regs = set_irq_regs(regs);
@@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * re
 
 	irq_exit();
 	set_irq_regs(old_regs);
+
+	trace_timer_interrupt_exit(regs);
 }
 
 void wakeup_decrementer(void)
Index: linux.trees.git/arch/powerpc/include/asm/trace.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/trace.h	2009-10-07 17:22:25.000000000 +1100
+++ linux.trees.git/arch/powerpc/include/asm/trace.h	2009-10-07 17:23:20.000000000 +1100
@@ -42,6 +42,40 @@ TRACE_EVENT(irq_exit,
 	TP_printk("pt_regs=%p", __entry->regs)
 );
 
+TRACE_EVENT(timer_interrupt_entry,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
+TRACE_EVENT(timer_interrupt_exit,
+
+	TP_PROTO(struct pt_regs *regs),
+
+	TP_ARGS(regs),
+
+	TP_STRUCT__entry(
+		__field(struct pt_regs *, regs)
+	),
+
+	TP_fast_assign(
+		__entry->regs = regs;
+	),
+
+	TP_printk("pt_regs=%p", __entry->regs)
+);
+
 #endif /* _TRACE_POWERPC_H */
 
 #undef TRACE_INCLUDE_PATH

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/6] powerpc: tracing: Add hypervisor call tracepoints
  2009-10-27  4:49 ` [PATCH 2/6] powerpc: tracing: Add powerpc tracepoints for timer " Anton Blanchard
@ 2009-10-27  4:50   ` Anton Blanchard
  2009-10-27  4:51     ` [PATCH 4/6] powerpc: tracing: Give hypervisor call tracepoints access to arguments Anton Blanchard
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:50 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, benh; +Cc: linuxppc-dev


Add hcall_entry and hcall_exit tracepoints. This replaces the inline
assembly HCALL_STATS code and converts it to use the new tracepoints.

To keep the disabled case as quick as possible, we embed a status word
in the TOC so we can get at it with a single load. By doing so we
keep the overhead at a minimum. Time taken for a null hcall:

No tracepoint code:	135.79 cycles
Disabled tracepoints:	137.95 cycles

For reference, before this patch enabling HCALL_STATS resulted in a null
hcall of 201.44 cycles!

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.trees.git/arch/powerpc/platforms/pseries/hvCall.S
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/hvCall.S	2009-10-27 13:36:05.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/hvCall.S	2009-10-27 14:53:21.000000000 +1100
@@ -14,20 +14,54 @@
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
-#ifdef CONFIG_HCALL_STATS
+#ifdef CONFIG_TRACEPOINTS
+
+	.section	".toc","aw"
+
+	.globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+	.llong	0
+
+	.section	".text"
+
 /*
  * precall must preserve all registers.  use unused STK_PARM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. We branch around this
+ * in early init (eg when populating the MMU hashtable) by using an
+ * unconditional cpu feature.
  */
 #define HCALL_INST_PRECALL					\
-	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
-	mftb	r0;			/* get timebase and */	\
-	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
 BEGIN_FTR_SECTION;						\
-	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
-	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);
-	
+	b	1f;						\
+END_FTR_SECTION(0, 1);						\
+	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
+	cmpdi	r12,0;						\
+	beq+	1f;						\
+	mflr	r0;						\
+	std	r3,STK_PARM(r3)(r1);				\
+	std	r4,STK_PARM(r4)(r1);				\
+	std	r5,STK_PARM(r5)(r1);				\
+	std	r6,STK_PARM(r6)(r1);				\
+	std	r7,STK_PARM(r7)(r1);				\
+	std	r8,STK_PARM(r8)(r1);				\
+	std	r9,STK_PARM(r9)(r1);				\
+	std	r10,STK_PARM(r10)(r1);				\
+	std	r0,16(r1);					\
+	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	bl	.__trace_hcall_entry;				\
+	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r0,16(r1);					\
+	ld	r3,STK_PARM(r3)(r1);				\
+	ld	r4,STK_PARM(r4)(r1);				\
+	ld	r5,STK_PARM(r5)(r1);				\
+	ld	r6,STK_PARM(r6)(r1);				\
+	ld	r7,STK_PARM(r7)(r1);				\
+	ld	r8,STK_PARM(r8)(r1);				\
+	ld	r9,STK_PARM(r9)(r1);				\
+	ld	r10,STK_PARM(r10)(r1);				\
+	mtlr	r0;						\
+1:
+
 /*
  * postcall is performed immediately before function return which
  * allows liberal use of volatile registers.  We branch around this
@@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR);
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
-	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
-	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
-	bgt-	cr7,1f;						\
-								\
-	/* get time and PURR snapshots after hcall */		\
-	mftb	r7;			/* timebase after */	\
-BEGIN_FTR_SECTION;						\
-	mfspr	r8,SPRN_PURR;		/* PURR after */	\
-	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
-	subf	r6,r6,r8;		/* delta */		\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
-	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
-	subf	r5,r5,r7;		/* time delta */	\
-								\
-	/* calculate address of stat structure r4 = opcode */	\
-	srdi	r4,r4,2;		/* index into array */	\
-	mulli	r4,r4,HCALL_STAT_SIZE;				\
-	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
-	add	r4,r4,r7;					\
-	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
-	add	r4,r4,r7;					\
-								\
-	/* update stats	*/					\
-	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
-	addi	r7,r7,1;					\
-	std	r7,HCALL_STAT_CALLS(r4);			\
-	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
-	add	r7,r7,r5;					\
-	std	r7,HCALL_STAT_TB(r4);				\
-BEGIN_FTR_SECTION;						\
-	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
-	add	r7,r7,r6;					\
-	std	r7,HCALL_STAT_PURR(r4);				\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+	ld      r12,hcall_tracepoint_refcount@toc(r2);		\
+	cmpdi	r12,0;						\
+	beq+	1f;						\
+	mflr	r0;						\
+	ld	r6,STK_PARM(r3)(r1);				\
+	std	r3,STK_PARM(r3)(r1);				\
+	mr	r4,r3;						\
+	mr	r3,r6;						\
+	std	r0,16(r1);					\
+	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	bl	.__trace_hcall_exit;				\
+	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r0,16(r1);					\
+	ld	r3,STK_PARM(r3)(r1);				\
+	mtlr	r0;						\
 1:
 #else
 #define HCALL_INST_PRECALL
Index: linux.trees.git/arch/powerpc/platforms/pseries/lpar.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/lpar.c	2009-10-27 13:36:05.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/lpar.c	2009-10-27 14:53:21.000000000 +1100
@@ -39,6 +39,7 @@
 #include <asm/cputable.h>
 #include <asm/udbg.h>
 #include <asm/smp.h>
+#include <asm/trace.h>
 
 #include "plpar_wrappers.h"
 #include "pseries.h"
@@ -661,3 +662,34 @@ void arch_free_page(struct page *page, i
 EXPORT_SYMBOL(arch_free_page);
 
 #endif
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+void hcall_tracepoint_regfunc(void)
+{
+	hcall_tracepoint_refcount++;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+	hcall_tracepoint_refcount--;
+}
+
+void __trace_hcall_entry(unsigned long opcode)
+{
+	trace_hcall_entry(opcode);
+}
+
+void __trace_hcall_exit(long opcode, unsigned long retval)
+{
+	trace_hcall_exit(opcode, retval);
+}
+#endif
Index: linux.trees.git/arch/powerpc/include/asm/trace.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/trace.h	2009-10-27 13:36:30.000000000 +1100
+++ linux.trees.git/arch/powerpc/include/asm/trace.h	2009-10-27 14:56:38.000000000 +1100
@@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit,
 	TP_printk("pt_regs=%p", __entry->regs)
 );
 
+#ifdef CONFIG_PPC_PSERIES
+extern void hcall_tracepoint_regfunc(void);
+extern void hcall_tracepoint_unregfunc(void);
+
+TRACE_EVENT_FN(hcall_entry,
+
+	TP_PROTO(unsigned long opcode),
+
+	TP_ARGS(opcode),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+	),
+
+	TP_printk("opcode=%lu", __entry->opcode),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+
+TRACE_EVENT_FN(hcall_exit,
+
+	TP_PROTO(unsigned long opcode, unsigned long retval),
+
+	TP_ARGS(opcode, retval),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, opcode)
+		__field(unsigned long, retval)
+	),
+
+	TP_fast_assign(
+		__entry->opcode = opcode;
+		__entry->retval = retval;
+	),
+
+	TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+
+	hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
+);
+#endif
+
 #endif /* _TRACE_POWERPC_H */
 
 #undef TRACE_INCLUDE_PATH
Index: linux.trees.git/arch/powerpc/include/asm/hvcall.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/hvcall.h	2009-10-27 13:36:05.000000000 +1100
+++ linux.trees.git/arch/powerpc/include/asm/hvcall.h	2009-10-27 14:53:21.000000000 +1100
@@ -274,6 +274,8 @@ struct hcall_stats {
 	unsigned long	num_calls;	/* number of calls (on this CPU) */
 	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
 	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+	unsigned long	tb_start;
+	unsigned long	purr_start;
 };
 #define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
 
Index: linux.trees.git/arch/powerpc/platforms/pseries/hvCall_inst.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/hvCall_inst.c	2009-10-27 13:36:05.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/hvCall_inst.c	2009-10-27 14:53:21.000000000 +1100
@@ -26,6 +26,7 @@
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
 #include <asm/cputable.h>
+#include <asm/trace.h>
 
 DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
 
@@ -100,6 +101,34 @@ static const struct file_operations hcal
 #define	HCALL_ROOT_DIR		"hcall_inst"
 #define CPU_NAME_BUF_SIZE	32
 
+
+static void probe_hcall_entry(unsigned long opcode)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = &get_cpu_var(hcall_stats)[opcode / 4];
+	h->tb_start = mftb();
+	h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
+{
+	struct hcall_stats *h;
+
+	if (opcode > MAX_HCALL_OPCODE)
+		return;
+
+	h = &__get_cpu_var(hcall_stats)[opcode / 4];
+	h->num_calls++;
+	h->tb_total = mftb() - h->tb_start;
+	h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
+
+	put_cpu_var(hcall_stats);
+}
+
 static int __init hcall_inst_init(void)
 {
 	struct dentry *hcall_root;
@@ -110,6 +139,14 @@ static int __init hcall_inst_init(void)
 	if (!firmware_has_feature(FW_FEATURE_LPAR))
 		return 0;
 
+	if (register_trace_hcall_entry(probe_hcall_entry))
+		return -EINVAL;
+
+	if (register_trace_hcall_exit(probe_hcall_exit)) {
+		unregister_trace_hcall_entry(probe_hcall_entry);
+		return -EINVAL;
+	}
+
 	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
 	if (!hcall_root)
 		return -ENOMEM;
Index: linux.trees.git/arch/powerpc/Kconfig.debug
===================================================================
--- linux.trees.git.orig/arch/powerpc/Kconfig.debug	2009-10-27 13:36:05.000000000 +1100
+++ linux.trees.git/arch/powerpc/Kconfig.debug	2009-10-27 13:36:30.000000000 +1100
@@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE
 
 config HCALL_STATS
 	bool "Hypervisor call instrumentation"
-	depends on PPC_PSERIES && DEBUG_FS
+	depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS
 	help
 	  Adds code to keep track of the number of hypervisor calls made and
 	  the amount of time spent in hypervisor calls.  Wall time spent in

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 4/6] powerpc: tracing: Give hypervisor call tracepoints access to arguments
  2009-10-27  4:50   ` [PATCH 3/6] powerpc: tracing: Add hypervisor call tracepoints Anton Blanchard
@ 2009-10-27  4:51     ` Anton Blanchard
  2009-10-27  4:51       ` [PATCH 5/6] powerpc: Disable HCALL_STATS by default Anton Blanchard
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:51 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, benh; +Cc: linuxppc-dev


While most users of the hcall tracepoints will only want the opcode and return
code, some will want all the arguments. To avoid the complexity of using
varargs we pass a pointer to the register save area which contain all
arguments.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.trees.git/arch/powerpc/platforms/pseries/hvCall.S
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/hvCall.S	2009-10-27 14:29:09.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/hvCall.S	2009-10-27 14:29:16.000000000 +1100
@@ -30,7 +30,7 @@ hcall_tracepoint_refcount:
  * in early init (eg when populating the MMU hashtable) by using an
  * unconditional cpu feature.
  */
-#define HCALL_INST_PRECALL					\
+#define HCALL_INST_PRECALL(FIRST_REG)				\
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
@@ -47,6 +47,7 @@ END_FTR_SECTION(0, 1);						\
 	std	r9,STK_PARM(r9)(r1);				\
 	std	r10,STK_PARM(r10)(r1);				\
 	std	r0,16(r1);					\
+	addi	r4,r1,STK_PARM(FIRST_REG);			\
 	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
 	bl	.__trace_hcall_entry;				\
 	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
@@ -68,7 +69,7 @@ END_FTR_SECTION(0, 1);						\
  * in early init (eg when populating the MMU hashtable) by using an
  * unconditional cpu feature.
  */
-#define HCALL_INST_POSTCALL					\
+#define __HCALL_INST_POSTCALL					\
 BEGIN_FTR_SECTION;						\
 	b	1f;						\
 END_FTR_SECTION(0, 1);						\
@@ -88,9 +89,19 @@ END_FTR_SECTION(0, 1);						\
 	ld	r3,STK_PARM(r3)(r1);				\
 	mtlr	r0;						\
 1:
+
+#define HCALL_INST_POSTCALL_NORETS				\
+	li	r5,0;						\
+	__HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG)				\
+	mr	r5,BUFREG;					\
+	__HCALL_INST_POSTCALL
+
 #else
-#define HCALL_INST_PRECALL
-#define HCALL_INST_POSTCALL
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
 #endif
 
 	.text
@@ -101,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r4)
 
 	HVSC				/* invoke the hypervisor */
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL_NORETS
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
@@ -117,7 +128,7 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r5)
 
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -136,7 +147,7 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
@@ -183,7 +194,7 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
-	HCALL_INST_PRECALL
+	HCALL_INST_PRECALL(r5)
 
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
@@ -211,7 +222,7 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r0, 64(r12)
 
-	HCALL_INST_POSTCALL
+	HCALL_INST_POSTCALL(r12)
 
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
Index: linux.trees.git/arch/powerpc/include/asm/trace.h
===================================================================
--- linux.trees.git.orig/arch/powerpc/include/asm/trace.h	2009-10-27 14:28:15.000000000 +1100
+++ linux.trees.git/arch/powerpc/include/asm/trace.h	2009-10-27 14:29:16.000000000 +1100
@@ -81,9 +81,9 @@ extern void hcall_tracepoint_unregfunc(v
 
 TRACE_EVENT_FN(hcall_entry,
 
-	TP_PROTO(unsigned long opcode),
+	TP_PROTO(unsigned long opcode, unsigned long *args),
 
-	TP_ARGS(opcode),
+	TP_ARGS(opcode, args),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, opcode)
@@ -100,9 +100,10 @@ TRACE_EVENT_FN(hcall_entry,
 
 TRACE_EVENT_FN(hcall_exit,
 
-	TP_PROTO(unsigned long opcode, unsigned long retval),
+	TP_PROTO(unsigned long opcode, unsigned long retval,
+		unsigned long *retbuf),
 
-	TP_ARGS(opcode, retval),
+	TP_ARGS(opcode, retval, retbuf),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, opcode)
Index: linux.trees.git/arch/powerpc/platforms/pseries/lpar.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/lpar.c	2009-10-27 14:28:16.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/lpar.c	2009-10-27 14:29:16.000000000 +1100
@@ -683,13 +683,14 @@ void hcall_tracepoint_unregfunc(void)
 	hcall_tracepoint_refcount--;
 }
 
-void __trace_hcall_entry(unsigned long opcode)
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
-	trace_hcall_entry(opcode);
+	trace_hcall_entry(opcode, args);
 }
 
-void __trace_hcall_exit(long opcode, unsigned long retval)
+void __trace_hcall_exit(long opcode, unsigned long retval,
+			unsigned long *retbuf)
 {
-	trace_hcall_exit(opcode, retval);
+	trace_hcall_exit(opcode, retval, retbuf);
 }
 #endif
Index: linux.trees.git/arch/powerpc/platforms/pseries/hvCall_inst.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/platforms/pseries/hvCall_inst.c	2009-10-27 14:28:16.000000000 +1100
+++ linux.trees.git/arch/powerpc/platforms/pseries/hvCall_inst.c	2009-10-27 14:29:16.000000000 +1100
@@ -102,7 +102,7 @@ static const struct file_operations hcal
 #define CPU_NAME_BUF_SIZE	32
 
 
-static void probe_hcall_entry(unsigned long opcode)
+static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
 {
 	struct hcall_stats *h;
 
@@ -114,7 +114,8 @@ static void probe_hcall_entry(unsigned l
 	h->purr_start = mfspr(SPRN_PURR);
 }
 
-static void probe_hcall_exit(unsigned long opcode, unsigned long retval)
+static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
+			     unsigned long *retbuf)
 {
 	struct hcall_stats *h;
 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 5/6] powerpc: Disable HCALL_STATS by default
  2009-10-27  4:51     ` [PATCH 4/6] powerpc: tracing: Give hypervisor call tracepoints access to arguments Anton Blanchard
@ 2009-10-27  4:51       ` Anton Blanchard
  2009-10-27  4:52         ` [PATCH 6/6] powerpc: Export powerpc_debugfs_root Anton Blanchard
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:51 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev


The overhead of HCALL_STATS is quite high and the functionality is very
rarely used. Key statistics are also missing (eg min/max).

With the new hcall tracepoints much more powerful tracing can be done in
a kernel module. Lets disable this by default.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.trees.git/arch/powerpc/configs/pseries_defconfig
===================================================================
--- linux.trees.git.orig/arch/powerpc/configs/pseries_defconfig	2009-10-27 14:56:58.000000000 +1100
+++ linux.trees.git/arch/powerpc/configs/pseries_defconfig	2009-10-27 14:57:11.000000000 +1100
@@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_PAGEALLOC is not set
-CONFIG_HCALL_STATS=y
+# CONFIG_HCALL_STATS is not set
 # CONFIG_CODE_PATCHING_SELFTEST is not set
 # CONFIG_FTR_FIXUP_SELFTEST is not set
 # CONFIG_MSI_BITMAP_SELFTEST is not set

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 6/6] powerpc: Export powerpc_debugfs_root
  2009-10-27  4:51       ` [PATCH 5/6] powerpc: Disable HCALL_STATS by default Anton Blanchard
@ 2009-10-27  4:52         ` Anton Blanchard
  2009-10-27  5:02           ` hypervisor call trace module Anton Blanchard
  0 siblings, 1 reply; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  4:52 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev


Kernel modules should be able to place their debug output inside our powerpc
debugfs directory.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.trees.git/arch/powerpc/kernel/setup-common.c
===================================================================
--- linux.trees.git.orig/arch/powerpc/kernel/setup-common.c	2009-10-27 12:59:00.000000000 +1100
+++ linux.trees.git/arch/powerpc/kernel/setup-common.c	2009-10-27 12:59:15.000000000 +1100
@@ -660,6 +660,7 @@ late_initcall(check_cache_coherency);
 
 #ifdef CONFIG_DEBUG_FS
 struct dentry *powerpc_debugfs_root;
+EXPORT_SYMBOL(powerpc_debugfs_root);
 
 static int powerpc_debugfs_init(void)
 {

^ permalink raw reply	[flat|nested] 9+ messages in thread

* hypervisor call trace module
  2009-10-27  4:52         ` [PATCH 6/6] powerpc: Export powerpc_debugfs_root Anton Blanchard
@ 2009-10-27  5:02           ` Anton Blanchard
  0 siblings, 0 replies; 9+ messages in thread
From: Anton Blanchard @ 2009-10-27  5:02 UTC (permalink / raw)
  To: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, benh; +Cc: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 509 bytes --]


Here is an example of using the hcall tracepoints. This kernel
module provides strace like functionality for hypervisor hcalls:

-> 0x64(ff000002, 1, 2, d0000000034d7a71, f, c000000000a6f388, 1, c000000000989008, c000000000a3f480)
  <- 0x64()

Which was an EOI (opcode 0x64) of 0xff000002

There are a number of drivers that carry a lot of hcall related debug
code just in case we have to chase down a bug. I'm hoping hcall tracepoints
could replace it all and allow for much more powerful debugging.

Anton

[-- Attachment #2: Makefile --]
[-- Type: text/plain, Size: 238 bytes --]

obj-m := hcall_trace.o
KDIR := /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
	$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules

clean:
	rm -rf *.mod.c *.ko *.o .*.cmd .tmp_versions Module.markers modules.order Module.symvers

[-- Attachment #3: hcall_trace.c --]
[-- Type: text/x-csrc, Size: 3597 bytes --]

/*
 * Hypervisor hcall trace
 *
 * Look for output in /sys/kernel/debug/powerpc/hcall_trace/
 * 
 * Copyright (C) 2009 Anton Blanchard <anton@au.ibm.com>, IBM
 *      
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */             
                        
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/relay.h>
#include <asm/trace.h>

#define SUBBUF_SIZE	131072
#define N_SUBBUFS	8

#define BUFLEN		512

static struct rchan *log_chan;

static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
{
	char buf[BUFLEN];

	/* Don't log H_CEDE */
	if (opcode == H_CEDE)
		return;

	snprintf(buf, BUFLEN,
		"-> 0x%lx(%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx)\n",
		opcode, *args, *(args+1), *(args+2), *(args+3), *(args+4),
		*(args+5), *(args+6), *(args+7), *(args+8));

	relay_write(log_chan, buf, strlen(buf));
}

static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
			     unsigned long *retbuf)
{
	char buf[BUFLEN];

	/* Don't log H_CEDE */
	if (opcode == H_CEDE)
		return;

	if (retbuf)
		snprintf(buf, BUFLEN, 
		    "  <- 0x%lx(%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx)\n",
			opcode, *retbuf, *(retbuf+1),
			*(retbuf+2), *(retbuf+3), *(retbuf+4), *(retbuf+5),
			*(retbuf+6), *(retbuf+7), *(retbuf+8));
	else
		sprintf(buf, "  <- 0x%lx()\n", opcode);

	relay_write(log_chan, buf, strlen(buf));
}

static struct dentry *create_buf_file_handler(const char *filename,
					      struct dentry *parent, int mode,
					      struct rchan_buf *buf,
					      int *is_global)
{
	return debugfs_create_file(filename, mode, parent, buf,
		&relay_file_operations);
}

static int remove_buf_file_handler(struct dentry *dentry)
{
	debugfs_remove(dentry);
	return 0;
}

static int subbuf_start(struct rchan_buf *buf, void *subbuf, void *prev_subbuf,
			size_t prev_padding)
{
	return 1;
}

static struct rchan_callbacks relay_callbacks =
{
	.create_buf_file = create_buf_file_handler,
	.remove_buf_file = remove_buf_file_handler,
	.subbuf_start = subbuf_start,
};

static struct dentry *debugfs_root;

static int __init hcall_trace_init(void)
{
	debugfs_root = debugfs_create_dir("hcall_trace", powerpc_debugfs_root);

	if (debugfs_root == ERR_PTR(-ENODEV)) {
		printk(KERN_ERR "Debugfs not configured\n");
		goto err_out;
	}

	if (!debugfs_root) {
		printk(KERN_ERR "Could not create debugfs directory\n");
		goto err_out;
	}

	log_chan = relay_open("cpu", debugfs_root, SUBBUF_SIZE,
			      N_SUBBUFS, &relay_callbacks, NULL);
	if (!log_chan) {
		printk(KERN_ERR "relay_open failed\n");
		goto err_relay_open;
	}

	if (register_trace_hcall_entry(probe_hcall_entry)) {
		printk(KERN_ERR "probe_hcall_entry probe point failed\n");
		goto err_probe_hcall_entry;
	}

	if (register_trace_hcall_exit(probe_hcall_exit)) {
		printk(KERN_ERR "probe_hcall_exit probe point failed\n");
		goto err_probe_hcall_exit;
	}

	return 0;

err_probe_hcall_exit:
	unregister_trace_hcall_entry(probe_hcall_entry);
err_probe_hcall_entry:
	relay_close(log_chan);
err_relay_open:
	debugfs_remove(debugfs_root);
err_out:
	return -ENODEV;
}

static void __exit hcall_trace_exit(void)
{
	unregister_trace_hcall_exit(probe_hcall_exit);
	unregister_trace_hcall_entry(probe_hcall_entry);

	relay_close(log_chan);
	debugfs_remove(debugfs_root);
}

module_init(hcall_trace_init)
module_exit(hcall_trace_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anton Blanchard");

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit
  2009-10-27  4:47 [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Anton Blanchard
  2009-10-27  4:49 ` [PATCH 2/6] powerpc: tracing: Add powerpc tracepoints for timer " Anton Blanchard
@ 2009-10-27 22:00 ` Steven Rostedt
  2009-10-27 23:41   ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 9+ messages in thread
From: Steven Rostedt @ 2009-10-27 22:00 UTC (permalink / raw)
  To: Anton Blanchard; +Cc: Frederic Weisbecker, Ingo Molnar, linuxppc-dev

I have no problem with any of these changes. I guess this is up to Ben
and company to decide the rest.

-- Steve

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit
  2009-10-27 22:00 ` [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Steven Rostedt
@ 2009-10-27 23:41   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 9+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-27 23:41 UTC (permalink / raw)
  To: rostedt; +Cc: Frederic Weisbecker, Ingo Molnar, linuxppc-dev, Anton Blanchard

On Tue, 2009-10-27 at 18:00 -0400, Steven Rostedt wrote:
> I have no problem with any of these changes. I guess this is up to Ben
> and company to decide the rest.

I'll stick them into my -next branch, hopefully today or tomorrow.

The ppc "perf_events" specific bits are going to go via paulus first.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2009-10-27 23:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-27  4:47 [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Anton Blanchard
2009-10-27  4:49 ` [PATCH 2/6] powerpc: tracing: Add powerpc tracepoints for timer " Anton Blanchard
2009-10-27  4:50   ` [PATCH 3/6] powerpc: tracing: Add hypervisor call tracepoints Anton Blanchard
2009-10-27  4:51     ` [PATCH 4/6] powerpc: tracing: Give hypervisor call tracepoints access to arguments Anton Blanchard
2009-10-27  4:51       ` [PATCH 5/6] powerpc: Disable HCALL_STATS by default Anton Blanchard
2009-10-27  4:52         ` [PATCH 6/6] powerpc: Export powerpc_debugfs_root Anton Blanchard
2009-10-27  5:02           ` hypervisor call trace module Anton Blanchard
2009-10-27 22:00 ` [PATCH 1/6] powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit Steven Rostedt
2009-10-27 23:41   ` Benjamin Herrenschmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).