linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] powerpc: Instrument Hypervisor Calls
@ 2006-08-14 23:21 Mike Kravetz
  2006-08-14 23:35 ` Geoff Levand
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Mike Kravetz @ 2006-08-14 23:21 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev

Here is an updated version of the patch to instrument hypervisor
calls.  In this version, all statistic updates are performed in
assembly code.  Statistics are made available via debugfs.
Instrumentation is enabled via a config option and there is zero
cost if not enabled.

-- 
Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>

diff -Naupr powerpc2/arch/powerpc/Kconfig.debug powerpc2.work/arch/powerpc/Kconfig.debug
--- powerpc2/arch/powerpc/Kconfig.debug	2006-08-14 18:00:45.000000000 +0000
+++ powerpc2.work/arch/powerpc/Kconfig.debug	2006-08-14 21:31:45.000000000 +0000
@@ -18,6 +18,20 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor calls: both wall time (based
+	  on time base) and cpu time (based on PURR).  A directory named
+	  hcall_inst is added at the root of the debugfs filesystem.  Within
+	  the hcall_inst directory are files that contain CPU specific call
+	  statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
 config DEBUGGER
 	bool "Enable debugger hooks"
 	depends on DEBUG_KERNEL
diff -Naupr powerpc2/arch/powerpc/kernel/asm-offsets.c powerpc2.work/arch/powerpc/kernel/asm-offsets.c
--- powerpc2/arch/powerpc/kernel/asm-offsets.c	2006-08-14 18:00:46.000000000 +0000
+++ powerpc2.work/arch/powerpc/kernel/asm-offsets.c	2006-08-14 22:22:49.000000000 +0000
@@ -136,6 +136,7 @@ int main(void)
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
@@ -160,6 +161,12 @@ int main(void)
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* hcall statistics */
+	DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+	DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+	DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+	DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
 	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
 	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/Makefile powerpc2.work/arch/powerpc/platforms/pseries/Makefile
--- powerpc2/arch/powerpc/platforms/pseries/Makefile	2006-08-14 18:00:55.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/Makefile	2006-08-14 21:31:45.000000000 +0000
@@ -12,3 +12,4 @@ obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o e
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/hvCall.S powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S
--- powerpc2/arch/powerpc/platforms/pseries/hvCall.S	2006-08-14 18:00:55.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S	2006-08-14 22:17:04.000000000 +0000
@@ -10,9 +10,66 @@
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL					\
+	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
+	mftb	r3;			/* get timebase and */	\
+	std     r3,STK_PARM(r5)(r1);	/* save for later */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r3,SPRN_PURR;		/* get PURR and */	\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+	std	r3,STK_PARM(r6)(r1);	/* save for later */	\
+	ld	r3,STK_PARM(r3)(r1);	/* opcode back in r3 */
+	
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volital registers.
+ */
+#define HCALL_INST_POSTCALL					\
+	/* get time and PURR snapshots after hcall */		\
+	mftb	r7;			/* timebase after */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r8,SPRN_PURR;		/* PURR after */	\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+								\
+	/* calculate time and PURR deltas for call */		\
+	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
+	subf	r5,r5,r7;					\
+	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
+	subf	r6,r6,r8;					\
+								\
+	/* calculate address of stat structure */		\
+	ld	r4,STK_PARM(r3)(r1);	/* use opcode as */	\
+	rldicl	r4,r4,62,2;		/* index into array */	\
+	mulli	r4,r4,HCALL_STAT_SIZE;				\
+	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	add	r4,r4,r7;					\
+	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
+	add	r4,r4,r7;					\
+								\
+	/* update stats	*/					\
+	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
+	addi	r7,r7,1;					\
+	std	r7,HCALL_STAT_CALLS(r4);			\
+	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
+	add	r7,r7,r5;					\
+	std	r7,HCALL_STAT_TB(r4);				\
+	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
+	add	r7,r7,r6;					\
+	std	r7,HCALL_STAT_PURR(r4);
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
 	.text
 
 _GLOBAL(plpar_hcall_norets)
@@ -21,8 +78,12 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -33,6 +94,8 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -50,6 +113,8 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -61,6 +126,8 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -86,6 +153,8 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r12,64(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/hvCall_inst.c powerpc2.work/arch/powerpc/platforms/pseries/hvCall_inst.c
--- powerpc2/arch/powerpc/platforms/pseries/hvCall_inst.c	1970-01-01 00:00:00.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/hvCall_inst.c	2006-08-14 22:31:31.000000000 +0000
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+
+DEFINE_PER_CPU(struct hcall_stats[MAX_HCALL_OPCODES+1], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < (MAX_HCALL_OPCODES + 1))
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+	if (hs[h_num].num_calls)
+		seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+			   hs[h_num].num_calls,
+			   hs[h_num].tb_total,
+			   hs[h_num].purr_total);
+
+	return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+	return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+__initcall(hcall_inst_init);
diff -Naupr powerpc2/include/asm-powerpc/hvcall.h powerpc2.work/include/asm-powerpc/hvcall.h
--- powerpc2/include/asm-powerpc/hvcall.h	2006-08-14 18:05:16.000000000 +0000
+++ powerpc2.work/include/asm-powerpc/hvcall.h	2006-08-14 21:31:45.000000000 +0000
@@ -246,6 +246,15 @@ long plpar_hcall(unsigned long opcode, u
 #define PLPAR_HCALL9_BUFSIZE 9
 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+			unsigned long purr_delta);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-14 23:21 Mike Kravetz
@ 2006-08-14 23:35 ` Geoff Levand
  2006-08-14 23:41   ` Mike Kravetz
  2006-08-15  1:03 ` Stephen Rothwell
  2006-08-15  3:32 ` Dave Boutcher
  2 siblings, 1 reply; 14+ messages in thread
From: Geoff Levand @ 2006-08-14 23:35 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev, Paul Mackerras

Mike Kravetz wrote:

> +config HCALL_STATS
> +	bool "Hypervisor call instrumentation"
> +	depends on PPC_PSERIES && DEBUG_FS
> +	help
> +	  Adds code to keep track of the number of hypervisor calls made and
> +	  the amount of time spent in hypervisor calls: both wall time (based
> +	  on time base) and cpu time (based on PURR).  A directory named
> +	  hcall_inst is added at the root of the debugfs filesystem.

Could we keep this more generic and not mention platform specific
things like PURR?

-Geoff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-14 23:35 ` Geoff Levand
@ 2006-08-14 23:41   ` Mike Kravetz
  2006-08-15  2:01     ` Geoff Levand
  0 siblings, 1 reply; 14+ messages in thread
From: Mike Kravetz @ 2006-08-14 23:41 UTC (permalink / raw)
  To: Geoff Levand; +Cc: linuxppc-dev, Paul Mackerras

On Mon, Aug 14, 2006 at 04:35:12PM -0700, Geoff Levand wrote:
> Mike Kravetz wrote:
> > +config HCALL_STATS
> > +	bool "Hypervisor call instrumentation"
> > +	depends on PPC_PSERIES && DEBUG_FS
> > +	help
> > +	  Adds code to keep track of the number of hypervisor calls made and
> > +	  the amount of time spent in hypervisor calls: both wall time (based
> > +	  on time base) and cpu time (based on PURR).  A directory named
> > +	  hcall_inst is added at the root of the debugfs filesystem.
> 
> Could we keep this more generic and not mention platform specific
> things like PURR?

Not sure if I follow.  PURR will be used/displayed if available.  Time
based statistics will always be available.

I can change the description to make this clear.  Or, are you asking
that this not be mentioned at all?  Based on previous discussions, I
added the ability to display both wall and cpu time if available.

Thanks,
--
Mike

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-14 23:21 Mike Kravetz
  2006-08-14 23:35 ` Geoff Levand
@ 2006-08-15  1:03 ` Stephen Rothwell
  2006-08-15  2:32   ` Mike Kravetz
  2006-08-15  3:32 ` Dave Boutcher
  2 siblings, 1 reply; 14+ messages in thread
From: Stephen Rothwell @ 2006-08-15  1:03 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev, paulus

[-- Attachment #1: Type: text/plain, Size: 2081 bytes --]

On Mon, 14 Aug 2006 16:21:44 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
>
> diff -Naupr powerpc2/arch/powerpc/platforms/pseries/hvCall.S powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S
> --- powerpc2/arch/powerpc/platforms/pseries/hvCall.S	2006-08-14 18:00:55.000000000 +0000
> +++ powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S	2006-08-14 22:17:04.000000000 +0000
> @@ -10,9 +10,66 @@
>  #include <asm/hvcall.h>
>  #include <asm/processor.h>
>  #include <asm/ppc_asm.h>
> +#include <asm/asm-offsets.h>
>  	
>  #define STK_PARM(i)     (48 + ((i)-3)*8)
>  
> +#ifdef CONFIG_HCALL_STATS
> +/*
> + * precall must preserve all registers.  use unused STK_PARM()
> + * areas to save snapshots and opcode.
> + */
> +#define HCALL_INST_PRECALL					\
> +	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
> +	mftb	r3;			/* get timebase and */	\
> +	std     r3,STK_PARM(r5)(r1);	/* save for later */	\
> +BEGIN_FTR_SECTION;						\
> +	mfspr	r3,SPRN_PURR;		/* get PURR and */	\
> +END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
> +	std	r3,STK_PARM(r6)(r1);	/* save for later */	\

So if we have no PURR, we use the TB value (this is fine)

> +	ld	r3,STK_PARM(r3)(r1);	/* opcode back in r3 */
> +	
> +/*
> + * postcall is performed immediately before function return which
> + * allows liberal use of volital registers.
> + */
> +#define HCALL_INST_POSTCALL					\
> +	/* get time and PURR snapshots after hcall */		\
> +	mftb	r7;			/* timebase after */	\
> +BEGIN_FTR_SECTION;						\
> +	mfspr	r8,SPRN_PURR;		/* PURR after */	\
> +END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
> +								\
> +	/* calculate time and PURR deltas for call */		\
> +	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
> +	subf	r5,r5,r7;					\
> +	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
> +	subf	r6,r6,r8;					\

But here, if we have no PURR we produce random numbers.  Maybe you should
reuse the TB value so the values reported may make some sense.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-14 23:41   ` Mike Kravetz
@ 2006-08-15  2:01     ` Geoff Levand
  2006-08-15  2:34       ` Mike Kravetz
  0 siblings, 1 reply; 14+ messages in thread
From: Geoff Levand @ 2006-08-15  2:01 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev, Paul Mackerras

Mike Kravetz wrote:
> On Mon, Aug 14, 2006 at 04:35:12PM -0700, Geoff Levand wrote:
>> Mike Kravetz wrote:
>> > +config HCALL_STATS
>> > +	bool "Hypervisor call instrumentation"
>> > +	depends on PPC_PSERIES && DEBUG_FS
>> > +	help
>> > +	  Adds code to keep track of the number of hypervisor calls made
> and
>> > +	  the amount of time spent in hypervisor calls: both wall time
> (based
>> > +	  on time base) and cpu time (based on PURR).  A directory named
>> > +	  hcall_inst is added at the root of the debugfs filesystem.
>> 
>> Could we keep this more generic and not mention platform specific
>> things like PURR?
> 
> Not sure if I follow.  PURR will be used/displayed if available.  Time
> based statistics will always be available.
> 
> I can change the description to make this clear.  Or, are you asking
> that this not be mentioned at all?  Based on previous discussions, I
> added the ability to display both wall and cpu time if available.

I want to hook my instrumentation into this config option also, but my
platform dosn't have PURR, so I would like you to remove the mention
of '(based on PURR)' in the description of the option.  This way we
can have a generic 'Hypervisor call instrumentation' option for users,
on whatever platform they are using.

Does it make sense?

-Geoff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-15  1:03 ` Stephen Rothwell
@ 2006-08-15  2:32   ` Mike Kravetz
  2006-08-15  3:12     ` Stephen Rothwell
  0 siblings, 1 reply; 14+ messages in thread
From: Mike Kravetz @ 2006-08-15  2:32 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linuxppc-dev, paulus

On Tue, Aug 15, 2006 at 11:03:19AM +1000, Stephen Rothwell wrote:
> On Mon, 14 Aug 2006 16:21:44 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
> > +#define HCALL_INST_POSTCALL					\
> > +	/* get time and PURR snapshots after hcall */		\
> > +	mftb	r7;			/* timebase after */	\
> > +BEGIN_FTR_SECTION;						\
> > +	mfspr	r8,SPRN_PURR;		/* PURR after */	\
> > +END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
> > +								\
> > +	/* calculate time and PURR deltas for call */		\
> > +	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
> > +	subf	r5,r5,r7;					\
> > +	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
> > +	subf	r6,r6,r8;					\
> 
> But here, if we have no PURR we produce random numbers.  Maybe you should
> reuse the TB value so the values reported may make some sense.

Good catch!

My intention was to detect the presence of PURR in the display (debugfs)
code.  If there is no PURR, then no PURR values are displayed.  My thought
is that it doesn't matter what values are in the field if we don't display
them.  

Unfortunately, I left that bit of code out of the patch.

How does that sound?  Suppose I can also put all the 'calculate PURR delta'
code in the FTR_SECTION to save a few cycles.
-- 
Mike

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-15  2:01     ` Geoff Levand
@ 2006-08-15  2:34       ` Mike Kravetz
  0 siblings, 0 replies; 14+ messages in thread
From: Mike Kravetz @ 2006-08-15  2:34 UTC (permalink / raw)
  To: Geoff Levand; +Cc: linuxppc-dev, Paul Mackerras

On Mon, Aug 14, 2006 at 07:01:39PM -0700, Geoff Levand wrote:
> I want to hook my instrumentation into this config option also, but my
> platform dosn't have PURR, so I would like you to remove the mention
> of '(based on PURR)' in the description of the option.  This way we
> can have a generic 'Hypervisor call instrumentation' option for users,
> on whatever platform they are using.
> 
> Does it make sense?

Makes sense.  No problem.  I'll include this update in a revised edition
of the patch.

Thanks,
-- 
Mike

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-15  2:32   ` Mike Kravetz
@ 2006-08-15  3:12     ` Stephen Rothwell
  0 siblings, 0 replies; 14+ messages in thread
From: Stephen Rothwell @ 2006-08-15  3:12 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev, paulus

[-- Attachment #1: Type: text/plain, Size: 692 bytes --]

On Mon, 14 Aug 2006 19:32:24 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
>
> My intention was to detect the presence of PURR in the display (debugfs)
> code.  If there is no PURR, then no PURR values are displayed.  My thought
> is that it doesn't matter what values are in the field if we don't display
> them.  
> 
> Unfortunately, I left that bit of code out of the patch.
> 
> How does that sound?  Suppose I can also put all the 'calculate PURR delta'
> code in the FTR_SECTION to save a few cycles.

That sounds good (as long as you also suppress the display).

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-14 23:21 Mike Kravetz
  2006-08-14 23:35 ` Geoff Levand
  2006-08-15  1:03 ` Stephen Rothwell
@ 2006-08-15  3:32 ` Dave Boutcher
  2 siblings, 0 replies; 14+ messages in thread
From: Dave Boutcher @ 2006-08-15  3:32 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev, Paul Mackerras

On Mon, 14 Aug 2006 16:21:44 -0700, Mike Kravetz <kravetz@us.ibm.com> said:
> Here is an updated version of the patch to instrument hypervisor
> calls.  In this version, all statistic updates are performed in
> assembly code.  Statistics are made available via debugfs.
> Instrumentation is enabled via a config option and there is zero
> cost if not enabled.

...

> +#define HCALL_INST_POSTCALL					\
> +	/* get time and PURR snapshots after hcall */		\
> +	mftb	r7;			/* timebase after */	\

if you just add a "mr r8,r7" here you will get consistent
behaviour (either TB and PURR if PURR is supported, or TB
everywhere.)  The processor will pretty much optimize that
away if the FTR section is not no-op-ed.

> +BEGIN_FTR_SECTION;						\
> +	mfspr	r8,SPRN_PURR;		/* PURR after */	\
> +END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\

Dave B

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] powerpc: Instrument Hypervisor Calls
@ 2006-08-16 16:04 Mike Kravetz
  2006-08-24  6:55 ` Paul Mackerras
  0 siblings, 1 reply; 14+ messages in thread
From: Mike Kravetz @ 2006-08-16 16:04 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev

Add instrumentation for hypervisor calls on pseries.  Call statistics
include number of calls, wall time and cpu cycles (if available) and
are made available via debugfs.  Instrumentation code is behind the
HCALL_STATS config option and has no impact if not enabled.

-- 
Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>

diff -Naupr powerpc2/arch/powerpc/Kconfig.debug powerpc2.work/arch/powerpc/Kconfig.debug
--- powerpc2/arch/powerpc/Kconfig.debug	2006-08-14 18:00:45.000000000 +0000
+++ powerpc2.work/arch/powerpc/Kconfig.debug	2006-08-15 18:45:57.000000000 +0000
@@ -18,6 +18,20 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor callsr.  Wall time spent in
+	  each call is always calculated, and if available CPU cycles spent
+	  are also calculated.  A directory named hcall_inst is added at the
+	  root of the debugfs filesystem.  Within the hcall_inst directory
+	  are files that contain CPU specific call statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
 config DEBUGGER
 	bool "Enable debugger hooks"
 	depends on DEBUG_KERNEL
diff -Naupr powerpc2/arch/powerpc/kernel/asm-offsets.c powerpc2.work/arch/powerpc/kernel/asm-offsets.c
--- powerpc2/arch/powerpc/kernel/asm-offsets.c	2006-08-14 18:00:46.000000000 +0000
+++ powerpc2.work/arch/powerpc/kernel/asm-offsets.c	2006-08-14 22:22:49.000000000 +0000
@@ -136,6 +136,7 @@ int main(void)
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
@@ -160,6 +161,12 @@ int main(void)
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* hcall statistics */
+	DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+	DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+	DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+	DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
 	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
 	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/Makefile powerpc2.work/arch/powerpc/platforms/pseries/Makefile
--- powerpc2/arch/powerpc/platforms/pseries/Makefile	2006-08-14 18:00:55.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/Makefile	2006-08-14 21:31:45.000000000 +0000
@@ -12,3 +12,4 @@ obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o e
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/hvCall.S powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S
--- powerpc2/arch/powerpc/platforms/pseries/hvCall.S	2006-08-14 18:00:55.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/hvCall.S	2006-08-15 19:06:10.000000000 +0000
@@ -10,9 +10,67 @@
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL					\
+	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
+	mftb	r3;			/* get timebase and */	\
+	std     r3,STK_PARM(r5)(r1);	/* save for later */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r3,SPRN_PURR;		/* get PURR and */	\
+	std	r3,STK_PARM(r6)(r1);	/* save for later */	\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+	ld	r3,STK_PARM(r3)(r1);	/* opcode back in r3 */
+	
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volital registers.
+ */
+#define HCALL_INST_POSTCALL					\
+	/* get time and PURR snapshots after hcall */		\
+	mftb	r7;			/* timebase after */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r8,SPRN_PURR;		/* PURR after */	\
+	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
+	subf	r6,r6,r8;		/* delta */		\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+								\
+	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
+	subf	r5,r5,r7;		/* time delta */	\
+								\
+	/* calculate address of stat structure */		\
+	ld	r4,STK_PARM(r3)(r1);	/* use opcode as */	\
+	rldicl	r4,r4,62,2;		/* index into array */	\
+	mulli	r4,r4,HCALL_STAT_SIZE;				\
+	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	add	r4,r4,r7;					\
+	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
+	add	r4,r4,r7;					\
+								\
+	/* update stats	*/					\
+	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
+	addi	r7,r7,1;					\
+	std	r7,HCALL_STAT_CALLS(r4);			\
+	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
+	add	r7,r7,r5;					\
+	std	r7,HCALL_STAT_TB(r4);				\
+BEGIN_FTR_SECTION;						\
+	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
+	add	r7,r7,r6;					\
+	std	r7,HCALL_STAT_PURR(r4);				\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
 	.text
 
 _GLOBAL(plpar_hcall_norets)
@@ -21,8 +79,12 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -33,6 +95,8 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -50,6 +114,8 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -61,6 +127,8 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -86,6 +154,8 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r12,64(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
diff -Naupr powerpc2/arch/powerpc/platforms/pseries/hvCall_inst.c powerpc2.work/arch/powerpc/platforms/pseries/hvCall_inst.c
--- powerpc2/arch/powerpc/platforms/pseries/hvCall_inst.c	1970-01-01 00:00:00.000000000 +0000
+++ powerpc2.work/arch/powerpc/platforms/pseries/hvCall_inst.c	2006-08-15 21:41:17.000000000 +0000
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+DEFINE_PER_CPU(struct hcall_stats[MAX_HCALL_OPCODES+1], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < (MAX_HCALL_OPCODES + 1))
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+	if (hs[h_num].num_calls) {
+		if (cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+	return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+__initcall(hcall_inst_init);
diff -Naupr powerpc2/include/asm-powerpc/hvcall.h powerpc2.work/include/asm-powerpc/hvcall.h
--- powerpc2/include/asm-powerpc/hvcall.h	2006-08-14 18:05:16.000000000 +0000
+++ powerpc2.work/include/asm-powerpc/hvcall.h	2006-08-14 21:31:45.000000000 +0000
@@ -246,6 +246,15 @@ long plpar_hcall(unsigned long opcode, u
 #define PLPAR_HCALL9_BUFSIZE 9
 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+			unsigned long purr_delta);
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-16 16:04 [PATCH] powerpc: Instrument Hypervisor Calls Mike Kravetz
@ 2006-08-24  6:55 ` Paul Mackerras
  2006-08-25 18:52   ` Mike Kravetz
  0 siblings, 1 reply; 14+ messages in thread
From: Paul Mackerras @ 2006-08-24  6:55 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev

Mike Kravetz writes:

> Add instrumentation for hypervisor calls on pseries.  Call statistics
> include number of calls, wall time and cpu cycles (if available) and
> are made available via debugfs.  Instrumentation code is behind the
> HCALL_STATS config option and has no impact if not enabled.

Starting to look really good, just a few minor comments below...

> +/*
> + * precall must preserve all registers.  use unused STK_PARM()
> + * areas to save snapshots and opcode.
> + */
> +#define HCALL_INST_PRECALL					\
> +	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
> +	mftb	r3;			/* get timebase and */	\
> +	std     r3,STK_PARM(r5)(r1);	/* save for later */	\
> +BEGIN_FTR_SECTION;						\
> +	mfspr	r3,SPRN_PURR;		/* get PURR and */	\
> +	std	r3,STK_PARM(r6)(r1);	/* save for later */	\
> +END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
> +	ld	r3,STK_PARM(r3)(r1);	/* opcode back in r3 */

You can use r0 here; maybe you could use it instead of having to
restore r3 (I see that you still need to save r3 for later).

> + * postcall is performed immediately before function return which
> + * allows liberal use of volital registers.

"volatile"

> +	/* calculate address of stat structure */		\
> +	ld	r4,STK_PARM(r3)(r1);	/* use opcode as */	\
> +	rldicl	r4,r4,62,2;		/* index into array */	\
> +	mulli	r4,r4,HCALL_STAT_SIZE;				\

It's a pity our multiplies are slow (6 cycles).  The rldicl would I
think be more clearly expressed as srdi r4,r4,2.  We could use a shift
and add instead of the multiply if we put a big fat comment in the
header that defines the structure warning people to adjust the
assembly if they change the structure.  Might not be worth it though.

> +	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\

We could use a load from the TOC here rather than the 5 instructions
that LOAD_REG_ADDR turns into.  Look at some gcc -S output to see how
it's done.

BTW are we going to die horribly if someone uses an hcall greater than
MAX_HCALL_OPCODES?  The hcall functions are available to modules, so
it would be quite possible for a module to come along and try to use
some new hcalls that weren't known about when the kernel was built.

Paul.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-08-24  6:55 ` Paul Mackerras
@ 2006-08-25 18:52   ` Mike Kravetz
  0 siblings, 0 replies; 14+ messages in thread
From: Mike Kravetz @ 2006-08-25 18:52 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev

On Thu, Aug 24, 2006 at 04:55:10PM +1000, Paul Mackerras wrote:
> > +	/* calculate address of stat structure */		\
> > +	ld	r4,STK_PARM(r3)(r1);	/* use opcode as */	\
> > +	rldicl	r4,r4,62,2;		/* index into array */	\
> > +	mulli	r4,r4,HCALL_STAT_SIZE;				\
> 
> It's a pity our multiplies are slow (6 cycles).  The rldicl would I
> think be more clearly expressed as srdi r4,r4,2.  We could use a shift
> and add instead of the multiply if we put a big fat comment in the
> header that defines the structure warning people to adjust the
> assembly if they change the structure.  Might not be worth it though.

I would rather keep the multiply and minimal safety it provides when
people change the structure.

> BTW are we going to die horribly if someone uses an hcall greater than
> MAX_HCALL_OPCODES?  The hcall functions are available to modules, so
> it would be quite possible for a module to come along and try to use
> some new hcalls that weren't known about when the kernel was built.

Yes, bad things would happen.  This is/was a bad assumption on my part
that all callers would pass in valid opcodes.  I'll put in a simple check
for that.

-- 
Mike

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] powerpc: Instrument Hypervisor Calls
@ 2006-09-06 23:23 Mike Kravetz
  2006-09-07  0:34 ` Paul Mackerras
  0 siblings, 1 reply; 14+ messages in thread
From: Mike Kravetz @ 2006-09-06 23:23 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev

Hi Paul,

This patch addresses the issues you brought up with the previous version.
The most important issue being range checking of opcodes so the code does
not write outside the array.  You also suggested doing a load from TOC
instead of LOAD_REG_ADDR, because LOAD_REG_ADDR turns into 5 instructions.
I 'think' LOAD_REG_ADDR only turns into 5 instructions on 32 bit systems.
It is a single load from TOC on 64 bits as we are using it.

Here it is:

Add instrumentation for hypervisor calls on pseries.  Call statistics
include number of calls, wall time and cpu cycles (if available) and
are made available via debugfs.  Instrumentation code is behind the
HCALL_STATS config option and has no impact if not enabled.

--
Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>

diff -Naupr powerpc/arch/powerpc/Kconfig.debug powerpc.work/arch/powerpc/Kconfig.debug
--- powerpc/arch/powerpc/Kconfig.debug	2006-09-06 22:40:53.000000000 +0000
+++ powerpc.work/arch/powerpc/Kconfig.debug	2006-09-06 23:41:40.000000000 +0000
@@ -18,6 +18,20 @@ config DEBUG_STACK_USAGE
 
 	  This option will slow down process creation somewhat.
 
+config HCALL_STATS
+	bool "Hypervisor call instrumentation"
+	depends on PPC_PSERIES && DEBUG_FS
+	help
+	  Adds code to keep track of the number of hypervisor calls made and
+	  the amount of time spent in hypervisor callsr.  Wall time spent in
+	  each call is always calculated, and if available CPU cycles spent
+	  are also calculated.  A directory named hcall_inst is added at the
+	  root of the debugfs filesystem.  Within the hcall_inst directory
+	  are files that contain CPU specific call statistics.
+
+	  This option will add a small amount of overhead to all hypervisor
+	  calls.
+
 config DEBUGGER
 	bool "Enable debugger hooks"
 	depends on DEBUG_KERNEL
diff -Naupr powerpc/arch/powerpc/kernel/asm-offsets.c powerpc.work/arch/powerpc/kernel/asm-offsets.c
--- powerpc/arch/powerpc/kernel/asm-offsets.c	2006-09-06 22:40:55.000000000 +0000
+++ powerpc.work/arch/powerpc/kernel/asm-offsets.c	2006-09-06 23:41:40.000000000 +0000
@@ -137,6 +137,7 @@ int main(void)
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+	DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
 	DEFINE(SLBSHADOW_STACKVSID,
 	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
@@ -165,6 +166,12 @@ int main(void)
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+	/* hcall statistics */
+	DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+	DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+	DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+	DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
 	DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
 	DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
diff -Naupr powerpc/arch/powerpc/platforms/pseries/Makefile powerpc.work/arch/powerpc/platforms/pseries/Makefile
--- powerpc/arch/powerpc/platforms/pseries/Makefile	2006-09-06 22:41:08.000000000 +0000
+++ powerpc.work/arch/powerpc/platforms/pseries/Makefile	2006-09-06 23:41:40.000000000 +0000
@@ -12,3 +12,4 @@ obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o e
 
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
+obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
diff -Naupr powerpc/arch/powerpc/platforms/pseries/hvCall.S powerpc.work/arch/powerpc/platforms/pseries/hvCall.S
--- powerpc/arch/powerpc/platforms/pseries/hvCall.S	2006-09-06 22:41:08.000000000 +0000
+++ powerpc.work/arch/powerpc/platforms/pseries/hvCall.S	2006-09-06 23:41:40.000000000 +0000
@@ -10,9 +10,69 @@
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
 	
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL					\
+	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
+	mftb	r0;			/* get timebase and */	\
+	std     r0,STK_PARM(r5)(r1);	/* save for later */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r0,SPRN_PURR;		/* get PURR and */	\
+	std	r0,STK_PARM(r6)(r1);	/* save for later */	\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);
+	
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define HCALL_INST_POSTCALL					\
+	ld	r4,STK_PARM(r3)(r1);	/* validate opcode */	\
+	cmpldi	cr7,r4,MAX_HCALL_OPCODE;			\
+	bgt-	cr7,1f;						\
+								\
+	/* get time and PURR snapshots after hcall */		\
+	mftb	r7;			/* timebase after */	\
+BEGIN_FTR_SECTION;						\
+	mfspr	r8,SPRN_PURR;		/* PURR after */	\
+	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
+	subf	r6,r6,r8;		/* delta */		\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
+	subf	r5,r5,r7;		/* time delta */	\
+								\
+	/* calculate address of stat structure r4 = opcode */	\
+	srdi	r4,r4,2;		/* index into array */	\
+	mulli	r4,r4,HCALL_STAT_SIZE;				\
+	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
+	add	r4,r4,r7;					\
+	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
+	add	r4,r4,r7;					\
+								\
+	/* update stats	*/					\
+	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
+	addi	r7,r7,1;					\
+	std	r7,HCALL_STAT_CALLS(r4);			\
+	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
+	add	r7,r7,r5;					\
+	std	r7,HCALL_STAT_TB(r4);				\
+BEGIN_FTR_SECTION;						\
+	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
+	add	r7,r7,r6;					\
+	std	r7,HCALL_STAT_PURR(r4);				\
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);				\
+1:
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
 	.text
 
 _GLOBAL(plpar_hcall_norets)
@@ -21,8 +81,12 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -33,6 +97,8 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -50,6 +116,8 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -61,6 +129,8 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -86,6 +156,8 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r12,64(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
diff -Naupr powerpc/arch/powerpc/platforms/pseries/hvCall_inst.c powerpc.work/arch/powerpc/platforms/pseries/hvCall_inst.c
--- powerpc/arch/powerpc/platforms/pseries/hvCall_inst.c	1970-01-01 00:00:00.000000000 +0000
+++ powerpc.work/arch/powerpc/platforms/pseries/hvCall_inst.c	2006-09-06 23:42:55.000000000 +0000
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+	if ((int)*pos < HCALL_STAT_ARRAY_SIZE)
+		return (void *)(unsigned long)(*pos + 1);
+
+	return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+	++*pos;
+
+	return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+	unsigned long h_num = (unsigned long)p;
+	struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+	if (hs[h_num].num_calls) {
+		if (!cpu_has_feature(CPU_FTR_PURR))
+			seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total,
+				   hs[h_num].purr_total);
+		else
+			seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+				   hs[h_num].num_calls,
+				   hs[h_num].tb_total);
+	}
+
+	return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+	int rc;
+	struct seq_file *seq;
+
+	rc = seq_open(file, &hcall_inst_seq_ops);
+	seq = file->private_data;
+	seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+	return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+	.open = hcall_inst_seq_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+#define	HCALL_ROOT_DIR		"hcall_inst"
+#define CPU_NAME_BUF_SIZE	32
+
+static int __init hcall_inst_init(void)
+{
+	struct dentry *hcall_root;
+	struct dentry *hcall_file;
+	char cpu_name_buf[CPU_NAME_BUF_SIZE];
+	int cpu;
+
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		return 0;
+
+	hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+	if (!hcall_root)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+		hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+						 hcall_root,
+						 per_cpu(hcall_stats, cpu),
+						 &hcall_inst_seq_fops);
+		if (!hcall_file)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+__initcall(hcall_inst_init);
diff -Naupr powerpc/include/asm-powerpc/hvcall.h powerpc.work/include/asm-powerpc/hvcall.h
--- powerpc/include/asm-powerpc/hvcall.h	2006-09-06 22:45:48.000000000 +0000
+++ powerpc.work/include/asm-powerpc/hvcall.h	2006-09-06 23:42:16.000000000 +0000
@@ -208,7 +208,7 @@
 #define H_JOIN			0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ		0x2B0
-#define MAX_HCALL_OPCODES	(H_ENABLE_CRQ >> 2)
+#define MAX_HCALL_OPCODE	H_ENABLE_CRQ
 
 #ifndef __ASSEMBLY__
 
@@ -246,6 +246,16 @@ long plpar_hcall(unsigned long opcode, u
 #define PLPAR_HCALL9_BUFSIZE 9
 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+	unsigned long	num_calls;	/* number of calls (on this CPU) */
+	unsigned long	tb_total;	/* total wall time (mftb) of calls. */
+	unsigned long	purr_total;	/* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+			unsigned long purr_delta);
+#define HCALL_STAT_ARRAY_SIZE	((MAX_HCALL_OPCODE >> 2) + 1)
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] powerpc: Instrument Hypervisor Calls
  2006-09-06 23:23 Mike Kravetz
@ 2006-09-07  0:34 ` Paul Mackerras
  0 siblings, 0 replies; 14+ messages in thread
From: Paul Mackerras @ 2006-09-07  0:34 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev

Mike Kravetz writes:

> I 'think' LOAD_REG_ADDR only turns into 5 instructions on 32 bit systems.
> It is a single load from TOC on 64 bits as we are using it.

You're right, David (Gibson) cleaned that up some time ago.  In fact
it's only 2 instructions on 32-bit systems, and it is a TOC load on
64-bit systems.

Paul.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-09-07  0:34 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-16 16:04 [PATCH] powerpc: Instrument Hypervisor Calls Mike Kravetz
2006-08-24  6:55 ` Paul Mackerras
2006-08-25 18:52   ` Mike Kravetz
  -- strict thread matches above, loose matches on Subject: below --
2006-09-06 23:23 Mike Kravetz
2006-09-07  0:34 ` Paul Mackerras
2006-08-14 23:21 Mike Kravetz
2006-08-14 23:35 ` Geoff Levand
2006-08-14 23:41   ` Mike Kravetz
2006-08-15  2:01     ` Geoff Levand
2006-08-15  2:34       ` Mike Kravetz
2006-08-15  1:03 ` Stephen Rothwell
2006-08-15  2:32   ` Mike Kravetz
2006-08-15  3:12     ` Stephen Rothwell
2006-08-15  3:32 ` Dave Boutcher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).