linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
@ 2007-03-22  2:09 Olof Johansson
  2007-03-22  2:10 ` Stephen Rothwell
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Olof Johansson @ 2007-03-22  2:09 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev, anton

mtocrf is a faster single-entry mtcrf available in PPC 2.00 and later
processors. It can make quite a difference in performance on some
implementations, so use it for CONFIG_POWER4_ONLY=y builds.


Signed-off-by: Olof Johansson <olof@lixom.net>

Index: linux-2.6/arch/powerpc/lib/mem_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/mem_64.S
+++ linux-2.6/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
 	rlwimi	r4,r4,16,0,15
 	cmplw	cr1,r5,r0		/* do we get that far? */
 	rldimi	r4,r4,32,0
-	mtcrf	1,r0
+	PPC_MTOCRF	1,r0
 	mr	r6,r3
 	blt	cr1,8f
 	beq+	3f			/* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
 	bdnz	4b
 5:	srwi.	r0,r5,3
 	clrlwi	r5,r5,29
-	mtcrf	1,r0
+	PPC_MTOCRF	1,r0
 	beq	8f
 	bf	29,6f
 	std	r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
 	std	r4,0(r6)
 	addi	r6,r6,8
 8:	cmpwi	r5,0
-	mtcrf	1,r5
+	PPC_MTOCRF	1,r5
 	beqlr+
 	bf	29,9f
 	stw	r4,0(r6)
Index: linux-2.6/arch/powerpc/lib/copyuser_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/copyuser_64.S
+++ linux-2.6/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
 	dcbt	0,r4
 	beq	.Lcopy_page_4K
 	andi.	r6,r6,7
-	mtcrf	0x01,r5
+	PPC_MTOCRF	0x01,r5
 	blt	cr1,.Lshort_copy
 	bne	.Ldst_unaligned
 .Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
 	b	.Ldo_tail
 
 .Ldst_unaligned:
-	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
 	subf	r5,r6,r5
 	li	r7,0
 	cmpldi	r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
 2:	bf	cr7*4+1,3f
 37:	lwzx	r0,r7,r4
 83:	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
+3:	PPC_MTOCRF	0x01,r5
 	add	r4,r6,r4
 	add	r3,r6,r3
 	b	.Ldst_aligned
Index: linux-2.6/arch/powerpc/lib/memcpy_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/memcpy_64.S
+++ linux-2.6/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
 	.align	7
 _GLOBAL(memcpy)
 	std	r3,48(r1)	/* save destination pointer for return value */
-	mtcrf	0x01,r5
+	PPC_MTOCRF	0x01,r5
 	cmpldi	cr1,r5,16
 	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
 	andi.	r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
 	b	.Ldo_tail
 
 .Ldst_unaligned:
-	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	PPC_MTOCRF	0x01,r6		# put #bytes to 8B bdry into cr7
 	subf	r5,r6,r5
 	li	r7,0
 	cmpldi	r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
 2:	bf	cr7*4+1,3f
 	lwzx	r0,r7,r4
 	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
+3:	PPC_MTOCRF	0x01,r5
 	add	r4,r6,r4
 	add	r3,r6,r3
 	b	.Ldst_aligned
Index: linux-2.6/include/asm-powerpc/asm-compat.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/asm-compat.h
+++ linux-2.6/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 
+/* Move to CR, single-entry optimized version. Only available
+ * on POWER4 and later.
+ */
+#ifdef CONFIG_POWER4_ONLY
+#define PPC_MTOCRF      stringify_in_c(mtocrf)
+#else
+#define PPC_MTOCRF      stringify_in_c(mtcrf)
+#endif
+
 #else /* 32-bit */
 
 /* operations for longs and pointers */
@@ -89,6 +100,7 @@
 #define PPC_LLARX	stringify_in_c(lwarx)
 #define PPC_STLCX	stringify_in_c(stwcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
+#define PPC_MTOCRF      stringify_in_c(mtcrf)
 
 #endif
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
  2007-03-22  2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
@ 2007-03-22  2:10 ` Stephen Rothwell
  2007-03-22 11:24 ` Segher Boessenkool
  2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson
  2 siblings, 0 replies; 5+ messages in thread
From: Stephen Rothwell @ 2007-03-22  2:10 UTC (permalink / raw)
  To: Olof Johansson; +Cc: linuxppc-dev, paulus, anton

[-- Attachment #1: Type: text/plain, Size: 840 bytes --]

On Wed, 21 Mar 2007 21:09:26 -0500 olof@lixom.net (Olof Johansson) wrote:
>
> +/* Move to CR, single-entry optimized version. Only available
> + * on POWER4 and later.
> + */
> +#ifdef CONFIG_POWER4_ONLY
> +#define PPC_MTOCRF      stringify_in_c(mtocrf)
                     ^^^^^^
Please use a TAB character here.

> +#else
> +#define PPC_MTOCRF      stringify_in_c(mtcrf)
                     ^^^^^^
and here.

> +#endif
> +
>  #else /* 32-bit */
>
>  /* operations for longs and pointers */
> @@ -89,6 +100,7 @@
>  #define PPC_LLARX	stringify_in_c(lwarx)
>  #define PPC_STLCX	stringify_in_c(stwcx.)
>  #define PPC_CNTLZL	stringify_in_c(cntlzw)
> +#define PPC_MTOCRF      stringify_in_c(mtcrf)
                     ^^^^^^
and here :-)

--
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
  2007-03-22  2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
  2007-03-22  2:10 ` Stephen Rothwell
@ 2007-03-22 11:24 ` Segher Boessenkool
  2007-03-22 11:50   ` Paul Mackerras
  2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson
  2 siblings, 1 reply; 5+ messages in thread
From: Segher Boessenkool @ 2007-03-22 11:24 UTC (permalink / raw)
  To: Olof Johansson; +Cc: linuxppc-dev, paulus, anton

> mtocrf is a faster single-entry mtcrf available in PPC 2.00 and later
> processors. It can make quite a difference in performance on some
> implementations, so use it for CONFIG_POWER4_ONLY=y builds.

Does anyone know which CPUs that don't implement mtocrf
don't treat it identical to mtcrf?  That would allow to
widen when we use mtocrf.  Or perhaps it's really time
for an "asm alternatives" run-time patch thing now :-)

Patch looks good as-is but I can always ask for more...


Segher

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
  2007-03-22 11:24 ` Segher Boessenkool
@ 2007-03-22 11:50   ` Paul Mackerras
  0 siblings, 0 replies; 5+ messages in thread
From: Paul Mackerras @ 2007-03-22 11:50 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Olof Johansson, linuxppc-dev, anton

Segher Boessenkool writes:

> Does anyone know which CPUs that don't implement mtocrf
> don't treat it identical to mtcrf?  That would allow to
> widen when we use mtocrf.  Or perhaps it's really time
> for an "asm alternatives" run-time patch thing now :-)

POWER3 takes a 0x700 interrupt when it sees mtocrf, unfortunately.

Paul.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] [v2] powerpc: Use mtocrf in asm when CONFIG_POWER4_ONLY=y
  2007-03-22  2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
  2007-03-22  2:10 ` Stephen Rothwell
  2007-03-22 11:24 ` Segher Boessenkool
@ 2007-03-22 14:34 ` Olof Johansson
  2 siblings, 0 replies; 5+ messages in thread
From: Olof Johansson @ 2007-03-22 14:34 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev, anton, sfr

mtocrf is a faster single-entry mtcrf available in POWER4 and later
processors. It can make quite a difference in performance on some
implementations, so use it for CONFIG_POWER4_ONLY builds.


Signed-off-by: Olof Johansson <olof@lixom.net>


---

Changed: Fixed space/tab mixup as noted by Stephen Rothwell.

Index: linux-2.6/arch/powerpc/lib/mem_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/mem_64.S
+++ linux-2.6/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
 	rlwimi	r4,r4,16,0,15
 	cmplw	cr1,r5,r0		/* do we get that far? */
 	rldimi	r4,r4,32,0
-	mtcrf	1,r0
+	PPC_MTOCRF	1,r0
 	mr	r6,r3
 	blt	cr1,8f
 	beq+	3f			/* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
 	bdnz	4b
 5:	srwi.	r0,r5,3
 	clrlwi	r5,r5,29
-	mtcrf	1,r0
+	PPC_MTOCRF	1,r0
 	beq	8f
 	bf	29,6f
 	std	r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
 	std	r4,0(r6)
 	addi	r6,r6,8
 8:	cmpwi	r5,0
-	mtcrf	1,r5
+	PPC_MTOCRF	1,r5
 	beqlr+
 	bf	29,9f
 	stw	r4,0(r6)
Index: linux-2.6/arch/powerpc/lib/copyuser_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/copyuser_64.S
+++ linux-2.6/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
 	dcbt	0,r4
 	beq	.Lcopy_page_4K
 	andi.	r6,r6,7
-	mtcrf	0x01,r5
+	PPC_MTOCRF	0x01,r5
 	blt	cr1,.Lshort_copy
 	bne	.Ldst_unaligned
 .Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
 	b	.Ldo_tail
 
 .Ldst_unaligned:
-	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	PPC_MTOCRF	0x01,r6		/* put #bytes to 8B bdry into cr7 */
 	subf	r5,r6,r5
 	li	r7,0
 	cmpldi	r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
 2:	bf	cr7*4+1,3f
 37:	lwzx	r0,r7,r4
 83:	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
+3:	PPC_MTOCRF	0x01,r5
 	add	r4,r6,r4
 	add	r3,r6,r3
 	b	.Ldst_aligned
Index: linux-2.6/arch/powerpc/lib/memcpy_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/memcpy_64.S
+++ linux-2.6/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
 	.align	7
 _GLOBAL(memcpy)
 	std	r3,48(r1)	/* save destination pointer for return value */
-	mtcrf	0x01,r5
+	PPC_MTOCRF	0x01,r5
 	cmpldi	cr1,r5,16
 	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
 	andi.	r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
 	b	.Ldo_tail
 
 .Ldst_unaligned:
-	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	PPC_MTOCRF	0x01,r6		# put #bytes to 8B bdry into cr7
 	subf	r5,r6,r5
 	li	r7,0
 	cmpldi	r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
 2:	bf	cr7*4+1,3f
 	lwzx	r0,r7,r4
 	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
+3:	PPC_MTOCRF	0x01,r5
 	add	r4,r6,r4
 	add	r3,r6,r3
 	b	.Ldst_aligned
Index: linux-2.6/include/asm-powerpc/asm-compat.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/asm-compat.h
+++ linux-2.6/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 
+/* Move to CR, single-entry optimized version. Only available
+ * on POWER4 and later.
+ */
+#ifdef CONFIG_POWER4_ONLY
+#define PPC_MTOCRF	stringify_in_c(mtocrf)
+#else
+#define PPC_MTOCRF	stringify_in_c(mtcrf)
+#endif
+
 #else /* 32-bit */
 
 /* operations for longs and pointers */
@@ -89,6 +100,7 @@
 #define PPC_LLARX	stringify_in_c(lwarx)
 #define PPC_STLCX	stringify_in_c(stwcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
+#define PPC_MTOCRF	stringify_in_c(mtcrf)
 
 #endif
 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2007-03-22 14:19 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-22  2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
2007-03-22  2:10 ` Stephen Rothwell
2007-03-22 11:24 ` Segher Boessenkool
2007-03-22 11:50   ` Paul Mackerras
2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).