linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ARM: lib: use LDRD/STRD for data copy
@ 2012-03-19  7:02 Boojin Kim
  2012-03-19  8:55 ` Russell King - ARM Linux
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Boojin Kim @ 2012-03-19  7:02 UTC (permalink / raw)
  To: linux-arm-kernel

This patch uses LDRD/STRD that loads and stores data as DWORD unit
for the copy of 8-words data.
It brings better performance than LDRM/STRM that was used originally.

Signed-off-by: Boojin Kim <boojin.kim@samsung.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/lib/copy_from_user.S |   14 +++++++++-----
 arch/arm/lib/copy_template.S  |   10 ++++++----
 arch/arm/lib/copy_to_user.S   |   13 +++++++++----
 arch/arm/lib/memcpy.S         |   13 +++++++++----
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 66a477a..15d1e1c 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -51,11 +51,6 @@
 	ldr1w \ptr, \reg4, \abort
 	.endm

-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
-	ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
-	.endm
-
 	.macro ldr1b ptr reg cond=al abort
 	ldrusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
@@ -68,6 +63,15 @@
 	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
 	.endm

+	.macro cpy8w dst src reg1 reg2 abort
+	.irp offset, #0, #8, #16, #24
+	ldr1w \src, \reg1, \abort
+	ldr1w \src, \reg2, \abort
+	strd \reg1, \reg2, [\dst, \offset]
+	.endr
+	add \dst, \dst, #32
+	.endm
+
 	.macro str1b ptr reg cond=al abort
 	str\cond\()b \reg, [\ptr], #1
 	.endm
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8..72640aa 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -28,9 +28,8 @@
  *	'ptr' to the next word. The 'abort' argument is used for fixup tables.
  *
  * ldr4w ptr reg1 reg2 reg3 reg4 abort
- * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
  *
- *	This loads four or eight words starting from 'ptr', stores them
+ *	This loads eight words starting from 'ptr', stores them
  *	in provided registers and increments 'ptr' past those words.
  *	The'abort' argument is used for fixup tables.
  *
@@ -47,6 +46,10 @@
  *	Same as their ldr* counterparts, but data is stored to 'ptr' location
  *	rather than being loaded.
  *
+ * cpy8w src dst reg1 reg2 abort
+ *	This loads eight words starting from 'src' and stores them to 'dst'.
+ *	The 'abort' argument is used for fixup tables.
+ *
  * enter reg1 reg2
  *
  *	Preserve the provided registers on the stack plus any additional
@@ -97,9 +100,8 @@
 	PLD(	pld	[r1, #92]		)

 3:	PLD(	pld	[r1, #124]		)
-4:		ldr8w	r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+4:		cpy8w   r0, r1, r4, r5, abort=20f
 		subs	r2, r2, #32
-		str8w	r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
 		bge	3b
 	PLD(	cmn	r2, #96			)
 	PLD(	bge	4b			)
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index d066df6..9402a08 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -48,10 +48,6 @@
 	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
 	.endm

-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
 	.macro ldr1b ptr reg cond=al abort
 	ldr\cond\()b \reg, [\ptr], #1
 	.endm
@@ -71,6 +67,15 @@
 	str1w \ptr, \reg8, \abort
 	.endm

+	.macro cpy8w dst src reg1 reg2 abort
+	.irp offset, #0, #8, #16, #24
+	ldrd \reg1, \reg2, [\src, \offset]
+	str1w \dst, \reg1, \abort
+	str1w \dst, \reg2, \abort
+	.endr
+	add \src, \src, #32
+	.endm
+
 	.macro str1b ptr reg cond=al abort
 	strusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e22..25320c9 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -24,10 +24,6 @@
 	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
 	.endm

-	.macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
-	ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
-	.endm
-
 	.macro ldr1b ptr reg cond=al abort
 	ldr\cond\()b \reg, [\ptr], #1
 	.endm
@@ -40,6 +36,15 @@
 	stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
 	.endm

+	.macro cpy8w dst src reg1 reg2 abort
+	.irp offset, #0, #8, #16, #24
+	ldrd \reg1, \reg2, [\src, \offset]
+	strd \reg1, \reg2, [\dst, \offset]
+	.endr
+	add \src, \src, #32
+	add \dst, \dst, #32
+	.endm
+
 	.macro str1b ptr reg cond=al abort
 	str\cond\()b \reg, [\ptr], #1
 	.endm
--
1.7.1

^ permalink raw reply related	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2012-03-29  4:00 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-19  7:02 [PATCH] ARM: lib: use LDRD/STRD for data copy Boojin Kim
2012-03-19  8:55 ` Russell King - ARM Linux
2012-03-19 14:36   ` Rob Herring
2012-03-19 15:41     ` Russell King - ARM Linux
2012-03-19 16:34       ` Måns Rullgård
2012-03-19 16:36       ` Rob Herring
2012-03-19 16:53         ` Nicolas Pitre
2012-03-19 17:11         ` Måns Rullgård
2012-03-19 20:11         ` Michael Hope
2012-03-20  0:21     ` Boojin Kim
2012-03-19 14:10 ` Nicolas Pitre
2012-03-20  0:05   ` Boojin Kim
2012-03-27  0:26 ` [PATCH 1/2] ARM: lib: Add optimized memcpy with 64 byte pld size Boojin Kim
2012-03-27  2:35   ` Nicolas Pitre
2012-03-28  0:28     ` Boojin Kim
2012-03-28  5:23       ` Nicolas Pitre
2012-03-29  4:00         ` [PATCH 0/4] memcpy optimized with strd/ldrd Nicolas Pitre
2012-03-29  4:00           ` [PATCH 1/4] ARM: copy_template.S: move some registers around Nicolas Pitre
2012-03-29  4:00           ` [PATCH 2/4] ARM: copy_template.S: rework the unaligned copy loop Nicolas Pitre
2012-03-29  4:00           ` [PATCH 3/4] ARM: copy_template.S: enforce contigous register set with memory accessors Nicolas Pitre
2012-03-29  4:00           ` [PATCH 4/4] ARM: option to select LDRD/STRD optimized memory copy Nicolas Pitre
2012-03-27  0:27 ` [PATCH 2/2] ARM: lib: use LDRD/STRD for data copy Boojin Kim
2012-03-27  7:40   ` Russell King - ARM Linux
2012-03-28  0:19     ` Boojin Kim
2012-03-28  4:10       ` Boojin Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).