* [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
@ 2007-03-22 2:09 Olof Johansson
2007-03-22 2:10 ` Stephen Rothwell
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Olof Johansson @ 2007-03-22 2:09 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev, anton
mtocrf is a faster single-entry mtcrf available in PPC 2.00 and later
processors. It can make quite a difference in performance on some
implementations, so use it for CONFIG_POWER4_ONLY=y builds.
Signed-off-by: Olof Johansson <olof@lixom.net>
Index: linux-2.6/arch/powerpc/lib/mem_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/mem_64.S
+++ linux-2.6/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
beq 8f
bf 29,6f
std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
- mtcrf 1,r5
+ PPC_MTOCRF 1,r5
beqlr+
bf 29,9f
stw r4,0(r6)
Index: linux-2.6/arch/powerpc/lib/copyuser_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/copyuser_64.S
+++ linux-2.6/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
blt cr1,.Lshort_copy
bne .Ldst_unaligned
.Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
Index: linux-2.6/arch/powerpc/lib/memcpy_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/memcpy_64.S
+++ linux-2.6/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
.align 7
_GLOBAL(memcpy)
std r3,48(r1) /* save destination pointer for return value */
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7
+ PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
Index: linux-2.6/include/asm-powerpc/asm-compat.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/asm-compat.h
+++ linux-2.6/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
#define PPC_STLCX stringify_in_c(stdcx.)
#define PPC_CNTLZL stringify_in_c(cntlzd)
+/* Move to CR, single-entry optimized version. Only available
+ * on POWER4 and later.
+ */
+#ifdef CONFIG_POWER4_ONLY
+#define PPC_MTOCRF stringify_in_c(mtocrf)
+#else
+#define PPC_MTOCRF stringify_in_c(mtcrf)
+#endif
+
#else /* 32-bit */
/* operations for longs and pointers */
@@ -89,6 +100,7 @@
#define PPC_LLARX stringify_in_c(lwarx)
#define PPC_STLCX stringify_in_c(stwcx.)
#define PPC_CNTLZL stringify_in_c(cntlzw)
+#define PPC_MTOCRF stringify_in_c(mtcrf)
#endif
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
2007-03-22 2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
@ 2007-03-22 2:10 ` Stephen Rothwell
2007-03-22 11:24 ` Segher Boessenkool
2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson
2 siblings, 0 replies; 5+ messages in thread
From: Stephen Rothwell @ 2007-03-22 2:10 UTC (permalink / raw)
To: Olof Johansson; +Cc: linuxppc-dev, paulus, anton
[-- Attachment #1: Type: text/plain, Size: 840 bytes --]
On Wed, 21 Mar 2007 21:09:26 -0500 olof@lixom.net (Olof Johansson) wrote:
>
> +/* Move to CR, single-entry optimized version. Only available
> + * on POWER4 and later.
> + */
> +#ifdef CONFIG_POWER4_ONLY
> +#define PPC_MTOCRF stringify_in_c(mtocrf)
^^^^^^
Please use a TAB character here.
> +#else
> +#define PPC_MTOCRF stringify_in_c(mtcrf)
^^^^^^
and here.
> +#endif
> +
> #else /* 32-bit */
>
> /* operations for longs and pointers */
> @@ -89,6 +100,7 @@
> #define PPC_LLARX stringify_in_c(lwarx)
> #define PPC_STLCX stringify_in_c(stwcx.)
> #define PPC_CNTLZL stringify_in_c(cntlzw)
> +#define PPC_MTOCRF stringify_in_c(mtcrf)
^^^^^^
and here :-)
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
2007-03-22 2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
2007-03-22 2:10 ` Stephen Rothwell
@ 2007-03-22 11:24 ` Segher Boessenkool
2007-03-22 11:50 ` Paul Mackerras
2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson
2 siblings, 1 reply; 5+ messages in thread
From: Segher Boessenkool @ 2007-03-22 11:24 UTC (permalink / raw)
To: Olof Johansson; +Cc: linuxppc-dev, paulus, anton
> mtocrf is a faster single-entry mtcrf available in PPC 2.00 and later
> processors. It can make quite a difference in performance on some
> implementations, so use it for CONFIG_POWER4_ONLY=y builds.
Does anyone know which CPUs that don't implement mtocrf
don't treat it identical to mtcrf? That would allow to
widen when we use mtocrf. Or perhaps it's really time
for an "asm alternatives" run-time patch thing now :-)
Patch looks good as-is but I can always ask for more...
Segher
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y
2007-03-22 11:24 ` Segher Boessenkool
@ 2007-03-22 11:50 ` Paul Mackerras
0 siblings, 0 replies; 5+ messages in thread
From: Paul Mackerras @ 2007-03-22 11:50 UTC (permalink / raw)
To: Segher Boessenkool; +Cc: Olof Johansson, linuxppc-dev, anton
Segher Boessenkool writes:
> Does anyone know which CPUs that don't implement mtocrf
> don't treat it identical to mtcrf? That would allow to
> widen when we use mtocrf. Or perhaps it's really time
> for an "asm alternatives" run-time patch thing now :-)
POWER3 takes a 0x700 interrupt when it sees mtocrf, unfortunately.
Paul.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] [v2] powerpc: Use mtocrf in asm when CONFIG_POWER4_ONLY=y
2007-03-22 2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
2007-03-22 2:10 ` Stephen Rothwell
2007-03-22 11:24 ` Segher Boessenkool
@ 2007-03-22 14:34 ` Olof Johansson
2 siblings, 0 replies; 5+ messages in thread
From: Olof Johansson @ 2007-03-22 14:34 UTC (permalink / raw)
To: paulus; +Cc: linuxppc-dev, anton, sfr
mtocrf is a faster single-entry mtcrf available in POWER4 and later
processors. It can make quite a difference in performance on some
implementations, so use it for CONFIG_POWER4_ONLY builds.
Signed-off-by: Olof Johansson <olof@lixom.net>
---
Changed: Fixed space/tab mixup as noted by Stephen Rothwell.
Index: linux-2.6/arch/powerpc/lib/mem_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/mem_64.S
+++ linux-2.6/arch/powerpc/lib/mem_64.S
@@ -19,7 +19,7 @@ _GLOBAL(memset)
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
mr r6,r3
blt cr1,8f
beq+ 3f /* if already 8-byte aligned */
@@ -49,7 +49,7 @@ _GLOBAL(memset)
bdnz 4b
5: srwi. r0,r5,3
clrlwi r5,r5,29
- mtcrf 1,r0
+ PPC_MTOCRF 1,r0
beq 8f
bf 29,6f
std r4,0(r6)
@@ -65,7 +65,7 @@ _GLOBAL(memset)
std r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
- mtcrf 1,r5
+ PPC_MTOCRF 1,r5
beqlr+
bf 29,9f
stw r4,0(r6)
Index: linux-2.6/arch/powerpc/lib/copyuser_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/copyuser_64.S
+++ linux-2.6/arch/powerpc/lib/copyuser_64.S
@@ -24,7 +24,7 @@ _GLOBAL(__copy_tofrom_user)
dcbt 0,r4
beq .Lcopy_page_4K
andi. r6,r6,7
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
blt cr1,.Lshort_copy
bne .Ldst_unaligned
.Ldst_aligned:
@@ -135,7 +135,7 @@ _GLOBAL(__copy_tofrom_user)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */
+ PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -150,7 +150,7 @@ _GLOBAL(__copy_tofrom_user)
2: bf cr7*4+1,3f
37: lwzx r0,r7,r4
83: stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
Index: linux-2.6/arch/powerpc/lib/memcpy_64.S
===================================================================
--- linux-2.6.orig/arch/powerpc/lib/memcpy_64.S
+++ linux-2.6/arch/powerpc/lib/memcpy_64.S
@@ -12,7 +12,7 @@
.align 7
_GLOBAL(memcpy)
std r3,48(r1) /* save destination pointer for return value */
- mtcrf 0x01,r5
+ PPC_MTOCRF 0x01,r5
cmpldi cr1,r5,16
neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
andi. r6,r6,7
@@ -128,7 +128,7 @@ _GLOBAL(memcpy)
b .Ldo_tail
.Ldst_unaligned:
- mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7
+ PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7
subf r5,r6,r5
li r7,0
cmpldi r1,r5,16
@@ -143,7 +143,7 @@ _GLOBAL(memcpy)
2: bf cr7*4+1,3f
lwzx r0,r7,r4
stwx r0,r7,r3
-3: mtcrf 0x01,r5
+3: PPC_MTOCRF 0x01,r5
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
Index: linux-2.6/include/asm-powerpc/asm-compat.h
===================================================================
--- linux-2.6.orig/include/asm-powerpc/asm-compat.h
+++ linux-2.6/include/asm-powerpc/asm-compat.h
@@ -78,6 +78,15 @@
#define PPC_STLCX stringify_in_c(stdcx.)
#define PPC_CNTLZL stringify_in_c(cntlzd)
+/* Move to CR, single-entry optimized version. Only available
+ * on POWER4 and later.
+ */
+#ifdef CONFIG_POWER4_ONLY
+#define PPC_MTOCRF stringify_in_c(mtocrf)
+#else
+#define PPC_MTOCRF stringify_in_c(mtcrf)
+#endif
+
#else /* 32-bit */
/* operations for longs and pointers */
@@ -89,6 +100,7 @@
#define PPC_LLARX stringify_in_c(lwarx)
#define PPC_STLCX stringify_in_c(stwcx.)
#define PPC_CNTLZL stringify_in_c(cntlzw)
+#define PPC_MTOCRF stringify_in_c(mtcrf)
#endif
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-03-22 14:19 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-22 2:09 [PATCH] Use mtocrf in asm when CONFIG_POWER4_ONLY=y Olof Johansson
2007-03-22 2:10 ` Stephen Rothwell
2007-03-22 11:24 ` Segher Boessenkool
2007-03-22 11:50 ` Paul Mackerras
2007-03-22 14:34 ` [PATCH] [v2] powerpc: " Olof Johansson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).