linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC] asm code for Hypervisor Call Instrumentation
@ 2006-08-02 17:59 Mike Kravetz
  2006-08-03  3:57 ` Stephen Rothwell
  2006-08-07  6:26 ` Paul Mackerras
  0 siblings, 2 replies; 8+ messages in thread
From: Mike Kravetz @ 2006-08-02 17:59 UTC (permalink / raw)
  To: linuxppc-dev

In my last submission of hcall instrumentation patches, I took
Paul's suggestion and added code to get the timebase and PURR
snapshots in the hcall asm routines.  I'm not confident in my
assembly skills, so I would really like some comments on this
approach/code.  The idea(and some code) was taken from hash_low_64.S.
Of course, this all 'appears' to work correctly. :)

This patch is built on top of Anton's hcall cleanup patch.  One
remaining issue is 'where should the statistic data structures
be updated?'.  For simplicity, I have the asm code call the
following C routine to perform the updates.

void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
                                unsigned long purr_delta)
{
        unsigned long op_index = opcode >> 2;
        struct hcall_stats *hs = &__get_cpu_var(hcall_stats[op_index]);

        hs->tb_total += tb_delta;
        hs->purr_total += purr_delta;
        hs->num_calls++;
}

I honestly do not know if it would be better to do all of this in the
assembly routine.  I believe that 'allocation' of a stack frame is
only necessary because of the callout.  Right?

-- 
Mike

diff -Naupr powerpc/arch/powerpc/platforms/pseries/hvCall.S powerpc.work/arch/powerpc/platforms/pseries/hvCall.S
--- powerpc/arch/powerpc/platforms/pseries/hvCall.S	2006-07-19 18:58:18.000000000 +0000
+++ powerpc.work/arch/powerpc/platforms/pseries/hvCall.S	2006-07-21 07:06:49.000000000 +0000
@@ -11,7 +11,57 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 	
-#define STK_PARM(i)     (48 + ((i)-3)*8)
+#define STK_PARM(i)     (STACKFRAMESIZE + 48 + ((i)-3)*8)
+#define STK_REG(i)      (112 + ((i)-14)*8)
+
+#ifdef CONFIG_HCALL_STATS
+#define STACKFRAMESIZE  256
+#define HCALL_INST_PRECALL					\
+	/* use stack frame to save a few non-volital regs */	\
+	stdu    r1,-STACKFRAMESIZE(r1);				\
+	std     r31,STK_REG(r31)(r1);				\
+	std     r30,STK_REG(r30)(r1);				\
+	std     r29,STK_REG(r29)(r1);				\
+	std     r28,STK_REG(r28)(r1);				\
+								\
+	/* save lr and hcall opcode */				\
+	/* then get time, purr snapshot before hcall */		\
+	mflr	r31;						\
+	mr	r30,r3;						\
+	mftb	r29;						\
+BEGIN_FTR_SECTION;						\
+	mfspr	r28,SPRN_PURR;					\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);
+
+#define HCALL_INST_POSTCALL					\
+	/* get time, purr snapshot after hcall */		\
+	mftb	r4;						\
+BEGIN_FTR_SECTION;						\
+	mfspr	r5,SPRN_PURR;					\
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
+								\
+	/* setup regs to call routine that stuffs stats */	\
+	/* into per-cpu/per-call structure.		*/	\
+	subf	r4,r29,r4;					\
+	subf	r5,r28,r5;					\
+	mr	r29,r3;		/* save hcall rc  */		\
+	mr	r3,r30;						\
+	bl	.update_hcall_stats;				\
+								\
+	/* restore hcall rc, lr and non-volital regs */		\
+	mr	r3,r29;						\
+	mtlr	r31;						\
+	ld      r31,STK_REG(r31)(r1);				\
+	ld      r30,STK_REG(r30)(r1);				\
+	ld      r29,STK_REG(r29)(r1);				\
+	ld      r28,STK_REG(r28)(r1);				\
+	addi    r1,r1,STACKFRAMESIZE
+#else
+
+#define STACKFRAMESIZE	0
+#define HCALL_INST_PRECALL	nop
+#define HCALL_INST_POSTCALL	nop
+#endif
 
 	.text
 
@@ -21,8 +71,12 @@ _GLOBAL(plpar_hcall_norets)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	HVSC				/* invoke the hypervisor */
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 	blr				/* return r3 = status */
@@ -33,6 +87,8 @@ _GLOBAL(plpar_hcall)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -50,6 +106,8 @@ _GLOBAL(plpar_hcall)
 	std	r6, 16(r12)
 	std	r7, 24(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 
@@ -61,6 +119,8 @@ _GLOBAL(plpar_hcall9)
 	mfcr	r0
 	stw	r0,8(r1)
 
+	HCALL_INST_PRECALL
+
 	std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
 	mr	r4,r5
@@ -86,6 +146,8 @@ _GLOBAL(plpar_hcall9)
 	std	r11,56(r12)
 	std	r12,64(r12)
 
+	HCALL_INST_POSTCALL
+
 	lwz	r0,8(r1)
 	mtcrf	0xff,r0
 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-02 17:59 [RFC] asm code for Hypervisor Call Instrumentation Mike Kravetz
@ 2006-08-03  3:57 ` Stephen Rothwell
  2006-08-03  4:05   ` Mike Kravetz
  2006-08-07  6:26 ` Paul Mackerras
  1 sibling, 1 reply; 8+ messages in thread
From: Stephen Rothwell @ 2006-08-03  3:57 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 354 bytes --]

Hi Mike,

On Wed, 2 Aug 2006 10:59:47 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
>
> +#else
> +
> +#define STACKFRAMESIZE	0
> +#define HCALL_INST_PRECALL	nop
> +#define HCALL_INST_POSTCALL	nop

Why even "nop", why not just blank?

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-03  3:57 ` Stephen Rothwell
@ 2006-08-03  4:05   ` Mike Kravetz
  2006-08-03 17:40     ` Mike Kravetz
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Kravetz @ 2006-08-03  4:05 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linuxppc-dev

On Thu, Aug 03, 2006 at 01:57:17PM +1000, Stephen Rothwell wrote:
> Hi Mike,
> 
> On Wed, 2 Aug 2006 10:59:47 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
> >
> > +#else
> > +
> > +#define STACKFRAMESIZE	0
> > +#define HCALL_INST_PRECALL	nop
> > +#define HCALL_INST_POSTCALL	nop
> 
> Why even "nop", why not just blank?

At first I was concerned about macro expansion and knew a 'nop'
would be ok.  Intended to go back and try 'blank', but forgot.
I'll give it a try.

-- 
Mike

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-03  4:05   ` Mike Kravetz
@ 2006-08-03 17:40     ` Mike Kravetz
  2006-08-03 23:25       ` Stephen Rothwell
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Kravetz @ 2006-08-03 17:40 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linuxppc-dev

On Wed, Aug 02, 2006 at 09:05:06PM -0700, Mike Kravetz wrote:
> On Thu, Aug 03, 2006 at 01:57:17PM +1000, Stephen Rothwell wrote:
> > On Wed, 2 Aug 2006 10:59:47 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
> > > +#else
> > > +
> > > +#define STACKFRAMESIZE	0
> > > +#define HCALL_INST_PRECALL	nop
> > > +#define HCALL_INST_POSTCALL	nop
> > 
> > Why even "nop", why not just blank?
> 
> At first I was concerned about macro expansion and knew a 'nop'
> would be ok.  Intended to go back and try 'blank', but forgot.
> I'll give it a try.

What is the preferred method to 'leave it blank'?

#define HCALL_INST_PRECALL	/* nop */
#define HCALL_INST_PRECALL	;
something else?

Leaving HCALL_INST_PRECALL undefined results in an assembler error.

-- 
Mike

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-03 17:40     ` Mike Kravetz
@ 2006-08-03 23:25       ` Stephen Rothwell
  2006-08-04  0:08         ` Mike Kravetz
  0 siblings, 1 reply; 8+ messages in thread
From: Stephen Rothwell @ 2006-08-03 23:25 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev

Hi Mike,

On Thu, 3 Aug 2006 10:40:58 -0700 Mike Kravetz <kravetz@us.ibm.com> wrote:
>
> What is the preferred method to 'leave it blank'?
> 
> #define HCALL_INST_PRECALL	/* nop */
> #define HCALL_INST_PRECALL	;
> something else?

I would have thought that

#define HCALL_INST_PRECALL

would work.

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-03 23:25       ` Stephen Rothwell
@ 2006-08-04  0:08         ` Mike Kravetz
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Kravetz @ 2006-08-04  0:08 UTC (permalink / raw)
  To: Stephen Rothwell; +Cc: linuxppc-dev

On Fri, Aug 04, 2006 at 09:25:52AM +1000, Stephen Rothwell wrote:
> I would have thought that
> 
> #define HCALL_INST_PRECALL
> 
> would work.

It does work.  That was too obvious for me. :)

-- 
Mike

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-02 17:59 [RFC] asm code for Hypervisor Call Instrumentation Mike Kravetz
  2006-08-03  3:57 ` Stephen Rothwell
@ 2006-08-07  6:26 ` Paul Mackerras
  2006-08-11 18:30   ` Mike Kravetz
  1 sibling, 1 reply; 8+ messages in thread
From: Paul Mackerras @ 2006-08-07  6:26 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: linuxppc-dev

Mike Kravetz writes:

> This patch is built on top of Anton's hcall cleanup patch.  One
> remaining issue is 'where should the statistic data structures
> be updated?'.  For simplicity, I have the asm code call the
> following C routine to perform the updates.

Hmmm, doing the update in assembly would avoid the need to create a
stack frame, which would be nice...  Maybe we need to add some macros
to include/asm-powerpc/percpu.h to make it easier to access per-cpu
variables from assembly code.

Alternatively, we could put a pointer to the hcall_stats array for
each cpu in its paca.  That's very easily accessed from assembly code.

Paul.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC] asm code for Hypervisor Call Instrumentation
  2006-08-07  6:26 ` Paul Mackerras
@ 2006-08-11 18:30   ` Mike Kravetz
  0 siblings, 0 replies; 8+ messages in thread
From: Mike Kravetz @ 2006-08-11 18:30 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev

On Mon, Aug 07, 2006 at 04:26:24PM +1000, Paul Mackerras wrote:
> Hmmm, doing the update in assembly would avoid the need to create a
> stack frame, which would be nice...  Maybe we need to add some macros
> to include/asm-powerpc/percpu.h to make it easier to access per-cpu
> variables from assembly code.
> 
> Alternatively, we could put a pointer to the hcall_stats array for
> each cpu in its paca.  That's very easily accessed from assembly code.

I finally got around to doing the update in assembly.  I have attached
the code in question below.  Macros were added (elsewhere) for things like 
stat structure and offsets within the structure.  No macros for per-cpu
data.  Rather, I just used some existing definitions and based code on
descriptions in asm-powerpc/percpu.h.  Let me know if this introduces too
many (unchecked at compile time assumptions) into the code.

The many comments are mostly for my benefit. :)

Thanks,
-- 
Mike

#define STK_PARM(i)     (48 + ((i)-3)*8)

#ifdef CONFIG_HCALL_STATS
/*
 * precall must preserve all registers.  use unused STK_PARM()
 * areas to save snapshots and opcode.
 */
#define HCALL_INST_PRECALL					\
	std	r3,STK_PARM(r3)(r1);	/* save opcode */	\
	mftb	r3;			/* get timebase and */	\
	std     r3,STK_PARM(r5)(r1);	/* save for later */	\
BEGIN_FTR_SECTION;						\
	mfspr	r3,SPRN_PURR;		/* get PURR and */	\
END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
	std	r3,STK_PARM(r6)(r1);	/* save for later */	\
	ld	r3,STK_PARM(r3)(r1);	/* opcode back in r3 */
	
/*
 * postcall is performed immediately before function return which
 * allows liberal use of non-volital registers.
 */
#define HCALL_INST_POSTCALL					\
	/* get time and PURR snapshots after hcall */		\
	mftb	r7;			/* timebase after */	\
BEGIN_FTR_SECTION;						\
	mfspr	r8,SPRN_PURR;		/* PURR after */	\
END_FTR_SECTION_IFSET(CPU_FTR_PURR);				\
								\
	/* calculate time and PURR deltas for call */		\
	ld	r5,STK_PARM(r5)(r1);	/* timebase before */	\
	subf	r5,r5,r7;					\
	ld	r6,STK_PARM(r6)(r1);	/* PURR before */	\
	subf	r6,r6,r8;					\
								\
	/* calculate address of stat structure */		\
	ld	r4,STK_PARM(r3)(r1);	/* use opcode as */	\
	rldicl	r4,r4,62,2;		/* index into array */	\
	mulli	r4,r4,HCALL_STAT_SIZE;				\
	LOAD_REG_ADDR(r7, per_cpu__hcall_stats);		\
	add	r4,r4,r7;					\
	ld	r7,PACA_DATA_OFFSET(r13); /* per cpu offset */	\
	add	r4,r4,r7;					\
								\
	/* update stats	*/					\
	ld	r7,HCALL_STAT_CALLS(r4); /* count */		\
	addi	r7,r7,1;					\
	std	r7,HCALL_STAT_CALLS(r4);			\
	ld      r7,HCALL_STAT_TB(r4);	/* timebase */		\
	add	r7,r7,r5;					\
	std	r7,HCALL_STAT_TB(r4);				\
	ld	r7,HCALL_STAT_PURR(r4);	/* PURR */		\
	add	r7,r7,r6;					\
	std	r7,HCALL_STAT_PURR(r4);
#else

#define HCALL_INST_PRECALL
#define HCALL_INST_POSTCALL
#endif

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2006-08-11 18:29 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-02 17:59 [RFC] asm code for Hypervisor Call Instrumentation Mike Kravetz
2006-08-03  3:57 ` Stephen Rothwell
2006-08-03  4:05   ` Mike Kravetz
2006-08-03 17:40     ` Mike Kravetz
2006-08-03 23:25       ` Stephen Rothwell
2006-08-04  0:08         ` Mike Kravetz
2006-08-07  6:26 ` Paul Mackerras
2006-08-11 18:30   ` Mike Kravetz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).