public inbox for u-boot@lists.denx.de
 help / color / mirror / Atom feed
* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
@ 2014-11-21 15:34 Stefan Agner
  2014-11-27 23:02 ` Stefan Agner
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Stefan Agner @ 2014-11-21 15:34 UTC (permalink / raw)
  To: u-boot

Resynchronize memcpy/memset with kernel and build them explicitly
in Thumb2 mode (unified syntax). Those assembler files can be
built and linked in ARM mode too, however when calling them from
Thumb2 built code, the stack got corrupted and the copy did not
succeed (the exact details have not been traced back). Hoever,
the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
is set.

To build the files without warning, some assembler instructions
had to be replaced with their UAL compliant variant (thanks
Jeroen for this input).

To build the file in Thumb2 mode the implicit-it=always option need
to be set to generate Thumb2 compliant IT instructions where needed.
We add this option to the general AFLAGS when building for Thumb2.

Signed-off-by: Stefan Agner <stefan@agner.ch>
---
Changes since v1:
- Don't set auto-it in AFLAGS
- Removed "no-warn-deprecated" warning suppression
- Converted non-UAL assembler instructions to their UAL compliant variants

Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
memcpy object files before and after converting the instructions to UAL,
the output was identical.

 arch/arm/config.mk               |   4 +-
 arch/arm/include/asm/assembler.h |  33 ++++++++++--
 arch/arm/lib/memcpy.S            |  80 +++++++++++++++++++---------
 arch/arm/lib/memset.S            | 112 ++++++++++++++++++++-------------------
 4 files changed, 142 insertions(+), 87 deletions(-)

diff --git a/arch/arm/config.mk b/arch/arm/config.mk
index f0eafd6..14f4e9f 100644
--- a/arch/arm/config.mk
+++ b/arch/arm/config.mk
@@ -26,7 +26,9 @@ PLATFORM_CPPFLAGS += -D__ARM__
 
 # Choose between ARM/Thumb instruction sets
 ifeq ($(CONFIG_SYS_THUMB_BUILD),y)
-PF_CPPFLAGS_ARM := $(call cc-option, -mthumb -mthumb-interwork,\
+AFLAGS_IMPLICIT_IT	:= $(call as-option,-Wa$(comma)-mimplicit-it=always)
+PF_CPPFLAGS_ARM		:= $(AFLAGS_IMPLICIT_IT) $(AFLAGS_NOWARN) \
+			$(call cc-option, -mthumb -mthumb-interwork,\
 			$(call cc-option,-marm,)\
 			$(call cc-option,-mno-thumb-interwork,)\
 		)
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 5e4789b..11b80fb 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -14,12 +14,14 @@
  *  assembler source.
  */
 
+#include <config.h>
+
 /*
  * Endian independent macros for shifting bytes within registers.
  */
 #ifndef __ARMEB__
-#define pull		lsr
-#define push		lsl
+#define lspull		lsr
+#define lspush		lsl
 #define get_byte_0	lsl #0
 #define get_byte_1	lsr #8
 #define get_byte_2	lsr #16
@@ -29,8 +31,8 @@
 #define put_byte_2	lsl #16
 #define put_byte_3	lsl #24
 #else
-#define pull		lsl
-#define push		lsr
+#define lspull		lsl
+#define lspush		lsr
 #define get_byte_0	lsr #24
 #define get_byte_1	lsr #16
 #define get_byte_2	lsr #8
@@ -54,7 +56,28 @@
 #define PLD(code...)
 #endif
 
+	.irp	c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+	.macro	ret\c, reg
+#if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__)
+	mov\c	pc, \reg
+#else
+	.ifeqs	"\reg", "lr"
+	bx\c	\reg
+	.else
+	mov\c	pc, \reg
+	.endif
+#endif
+	.endm
+	.endr
+
 /*
- * Cache alligned
+ * Cache aligned, used for optimized memcpy/memset
+ * In the kernel this is only enabled for Feroceon CPU's...
+ * We disable it especially for Thumb builds since those instructions
+ * are not made in a Thumb ready way...
  */
+#ifdef CONFIG_SYS_THUMB_BUILD
+#define CALGN(code...)
+#else
 #define CALGN(code...) code
+#endif
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index f655256..eeaf003 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -10,9 +10,14 @@
  *  published by the Free Software Foundation.
  */
 
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 
+#ifdef CONFIG_SYS_THUMB_BUILD
+#define W(instr)	instr.w
+#else
 #define W(instr)	instr
+#endif
 
 #define LDR1W_SHIFT	0
 #define STR1W_SHIFT	0
@@ -30,7 +35,7 @@
 	.endm
 
 	.macro ldr1b ptr reg cond=al abort
-	ldr\cond\()b \reg, [\ptr], #1
+	ldrb\cond\() \reg, [\ptr], #1
 	.endm
 
 	.macro str1w ptr reg abort
@@ -42,7 +47,7 @@
 	.endm
 
 	.macro str1b ptr reg cond=al abort
-	str\cond\()b \reg, [\ptr], #1
+	strb\cond\() \reg, [\ptr], #1
 	.endm
 
 	.macro enter reg1 reg2
@@ -56,10 +61,12 @@
 	.text
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
-
-.globl memcpy
-memcpy:
-
+	.syntax unified
+#ifdef CONFIG_SYS_THUMB_BUILD
+	.thumb
+	.thumb_func
+#endif
+ENTRY(memcpy)
 		cmp	r0, r1
 		moveq	pc, lr
 
@@ -79,7 +86,7 @@ memcpy:
 
 	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	r3, ip, #32		)
-	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
 	CALGN(	bcs	2f			)
 	CALGN(	adr	r4, 6f			)
 	CALGN(	subs	r2, r2, r3		)  @ C gets set
@@ -178,7 +185,7 @@ memcpy:
 
 	CALGN(	ands	ip, r0, #31		)
 	CALGN(	rsb	ip, ip, #32		)
-	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
 	CALGN(	subcc	r2, r2, ip		)
 	CALGN(	bcc	15f			)
 
@@ -193,24 +200,24 @@ memcpy:
 
 12:	PLD(	pld	[r1, #124]		)
 13:		ldr4w	r1, r4, r5, r6, r7, abort=19f
-		mov	r3, lr, pull #\pull
+		mov	r3, lr, lspull #\pull
 		subs	r2, r2, #32
 		ldr4w	r1, r8, r9, ip, lr, abort=19f
-		orr	r3, r3, r4, push #\push
-		mov	r4, r4, pull #\pull
-		orr	r4, r4, r5, push #\push
-		mov	r5, r5, pull #\pull
-		orr	r5, r5, r6, push #\push
-		mov	r6, r6, pull #\pull
-		orr	r6, r6, r7, push #\push
-		mov	r7, r7, pull #\pull
-		orr	r7, r7, r8, push #\push
-		mov	r8, r8, pull #\pull
-		orr	r8, r8, r9, push #\push
-		mov	r9, r9, pull #\pull
-		orr	r9, r9, ip, push #\push
-		mov	ip, ip, pull #\pull
-		orr	ip, ip, lr, push #\push
+		orr	r3, r3, r4, lspush #\push
+		mov	r4, r4, lspull #\pull
+		orr	r4, r4, r5, lspush #\push
+		mov	r5, r5, lspull #\pull
+		orr	r5, r5, r6, lspush #\push
+		mov	r6, r6, lspull #\pull
+		orr	r6, r6, r7, lspush #\push
+		mov	r7, r7, lspull #\pull
+		orr	r7, r7, r8, lspush #\push
+		mov	r8, r8, lspull #\pull
+		orr	r8, r8, r9, lspush #\push
+		mov	r9, r9, lspull #\pull
+		orr	r9, r9, ip, lspush #\push
+		mov	ip, ip, lspull #\pull
+		orr	ip, ip, lr, lspush #\push
 		str8w	r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
 		bge	12b
 	PLD(	cmn	r2, #96			)
@@ -221,10 +228,10 @@ memcpy:
 14:		ands	ip, r2, #28
 		beq	16f
 
-15:		mov	r3, lr, pull #\pull
+15:		mov	r3, lr, lspull #\pull
 		ldr1w	r1, lr, abort=21f
 		subs	ip, ip, #4
-		orr	r3, r3, lr, push #\push
+		orr	r3, r3, lr, lspush #\push
 		str1w	r0, r3, abort=21f
 		bgt	15b
 	CALGN(	cmp	r2, #0			)
@@ -241,3 +248,24 @@ memcpy:
 17:		forward_copy_shift	pull=16	push=16
 
 18:		forward_copy_shift	pull=24	push=8
+
+
+/*
+ * Abort preamble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+	.macro	copy_abort_preamble
+19:	ldmfd	sp!, {r5 - r9}
+	b	21f
+20:	ldmfd	sp!, {r5 - r8}
+21:
+	.endm
+
+	.macro	copy_abort_end
+	ldmfd	sp!, {r4, pc}
+	.endm
+
+ENDPROC(memcpy)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 0cdf895..7208f20 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -9,32 +9,25 @@
  *
  *  ASM optimised string functions
  */
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 
 	.text
 	.align	5
-	.word	0
 
-1:	subs	r2, r2, #4		@ 1 do we have enough
-	blt	5f			@ 1 bytes to align with?
-	cmp	r3, #2			@ 1
-	strltb	r1, [r0], #1		@ 1
-	strleb	r1, [r0], #1		@ 1
-	strb	r1, [r0], #1		@ 1
-	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
-/*
- * The pointer is now aligned and the length is adjusted.  Try doing the
- * memset again.
- */
-
-.globl memset
-memset:
+	.syntax unified
+#ifdef CONFIG_SYS_THUMB_BUILD
+	.thumb
+	.thumb_func
+#endif
+ENTRY(memset)
 	ands	r3, r0, #3		@ 1 unaligned?
-	bne	1b			@ 1
+	mov	ip, r0			@ preserve r0 as return value
+	bne	6f			@ 1
 /*
- * we know that the pointer in r0 is aligned to a word boundary.
+ * we know that the pointer in ip is aligned to a word boundary.
  */
-	orr	r1, r1, r1, lsl #8
+1:	orr	r1, r1, r1, lsl #8
 	orr	r1, r1, r1, lsl #16
 	mov	r3, r1
 	cmp	r2, #16
@@ -43,29 +36,28 @@ memset:
 #if ! CALGN(1)+0
 
 /*
- * We need an extra register for this loop - save the return address and
- * use the LR
+ * We need 2 extra registers for this loop - use r8 and the LR
  */
-	str	lr, [sp, #-4]!
-	mov	ip, r1
+	stmfd	sp!, {r8, lr}
+	mov	r8, r1
 	mov	lr, r1
 
 2:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3, ip, lr}	@ 64 bytes at a time.
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
-	stmgeia	r0!, {r1, r3, ip, lr}
+	stmiage	ip!, {r1, r3, r8, lr}	@ 64 bytes at a time.
+	stmiage	ip!, {r1, r3, r8, lr}
+	stmiage	ip!, {r1, r3, r8, lr}
+	stmiage	ip!, {r1, r3, r8, lr}
 	bgt	2b
-	ldmeqfd	sp!, {pc}		@ Now <64 bytes to go.
+	ldmfdeq	sp!, {r8, pc}		@ Now <64 bytes to go.
 /*
  * No need to correct the count; we're only testing bits from now on
  */
 	tst	r2, #32
-	stmneia	r0!, {r1, r3, ip, lr}
-	stmneia	r0!, {r1, r3, ip, lr}
+	stmiane	ip!, {r1, r3, r8, lr}
+	stmiane	ip!, {r1, r3, r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r1, r3, ip, lr}
-	ldr	lr, [sp], #4
+	stmiane	ip!, {r1, r3, r8, lr}
+	ldmfd	sp!, {r8, lr}
 
 #else
 
@@ -74,53 +66,63 @@ memset:
  * whole cache lines@once.
  */
 
-	stmfd	sp!, {r4-r7, lr}
+	stmfd	sp!, {r4-r8, lr}
 	mov	r4, r1
 	mov	r5, r1
 	mov	r6, r1
 	mov	r7, r1
-	mov	ip, r1
+	mov	r8, r1
 	mov	lr, r1
 
 	cmp	r2, #96
-	tstgt	r0, #31
+	tstgt	ip, #31
 	ble	3f
 
-	and	ip, r0, #31
-	rsb	ip, ip, #32
-	sub	r2, r2, ip
-	movs	ip, ip, lsl #(32 - 4)
-	stmcsia	r0!, {r4, r5, r6, r7}
-	stmmiia	r0!, {r4, r5}
-	tst	ip, #(1 << 30)
-	mov	ip, r1
-	strne	r1, [r0], #4
+	and	r8, ip, #31
+	rsb	r8, r8, #32
+	sub	r2, r2, r8
+	movs	r8, r8, lsl #(32 - 4)
+	stmiacs	ip!, {r4, r5, r6, r7}
+	stmiami	ip!, {r4, r5}
+	tst	r8, #(1 << 30)
+	mov	r8, r1
+	strne	r1, [ip], #4
 
 3:	subs	r2, r2, #64
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
-	stmgeia	r0!, {r1, r3-r7, ip, lr}
+	stmiage	ip!, {r1, r3-r8, lr}
+	stmiage	ip!, {r1, r3-r8, lr}
 	bgt	3b
-	ldmeqfd	sp!, {r4-r7, pc}
+	ldmfdeq	sp!, {r4-r8, pc}
 
 	tst	r2, #32
-	stmneia	r0!, {r1, r3-r7, ip, lr}
+	stmiane	ip!, {r1, r3-r8, lr}
 	tst	r2, #16
-	stmneia	r0!, {r4-r7}
-	ldmfd	sp!, {r4-r7, lr}
+	stmiane	ip!, {r4-r7}
+	ldmfd	sp!, {r4-r8, lr}
 
 #endif
 
 4:	tst	r2, #8
-	stmneia	r0!, {r1, r3}
+	stmiane	ip!, {r1, r3}
 	tst	r2, #4
-	strne	r1, [r0], #4
+	strne	r1, [ip], #4
 /*
  * When we get here, we've got less than 4 bytes to zero.  We
  * may have an unaligned pointer as well.
  */
 5:	tst	r2, #2
-	strneb	r1, [r0], #1
-	strneb	r1, [r0], #1
+	strbne	r1, [ip], #1
+	strbne	r1, [ip], #1
 	tst	r2, #1
-	strneb	r1, [r0], #1
-	mov	pc, lr
+	strbne	r1, [ip], #1
+	ret	lr
+
+6:	subs	r2, r2, #4		@ 1 do we have enough
+	blt	5b			@ 1 bytes to align with?
+	cmp	r3, #2			@ 1
+	strblt	r1, [ip], #1		@ 1
+	strble	r1, [ip], #1		@ 1
+	strb	r1, [ip], #1		@ 1
+	add	r2, r2, r3		@ 1 (r2 = r2 - (4 - r3))
+	b	1b
+ENDPROC(memset)
-- 
2.1.3

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-11-21 15:34 [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode Stefan Agner
@ 2014-11-27 23:02 ` Stefan Agner
  2014-11-30 19:33 ` Simon Glass
  2014-12-03 15:44 ` Bill Pringlemeir
  2 siblings, 0 replies; 9+ messages in thread
From: Stefan Agner @ 2014-11-27 23:02 UTC (permalink / raw)
  To: u-boot


On 2014-11-21 16:34, Stefan Agner wrote:
> Resynchronize memcpy/memset with kernel and build them explicitly
> in Thumb2 mode (unified syntax). Those assembler files can be
> built and linked in ARM mode too, however when calling them from
> Thumb2 built code, the stack got corrupted and the copy did not
> succeed (the exact details have not been traced back). Hoever,
> the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
> should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
> is set.
> 
> To build the files without warning, some assembler instructions
> had to be replaced with their UAL compliant variant (thanks
> Jeroen for this input).
> 
> To build the file in Thumb2 mode the implicit-it=always option need
> to be set to generate Thumb2 compliant IT instructions where needed.
> We add this option to the general AFLAGS when building for Thumb2.
> 
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
> Changes since v1:
> - Don't set auto-it in AFLAGS
> - Removed "no-warn-deprecated" warning suppression
> - Converted non-UAL assembler instructions to their UAL compliant variants
> 
> Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
> memcpy object files before and after converting the instructions to UAL,
> the output was identical.
> 

Any thoughts on that? Would be nice to have it in 2015.01 as the current
state breaks Thumb2 for our board.

--
Stefan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-11-21 15:34 [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode Stefan Agner
  2014-11-27 23:02 ` Stefan Agner
@ 2014-11-30 19:33 ` Simon Glass
  2014-11-30 20:07   ` Stefan Agner
  2014-12-03 15:44 ` Bill Pringlemeir
  2 siblings, 1 reply; 9+ messages in thread
From: Simon Glass @ 2014-11-30 19:33 UTC (permalink / raw)
  To: u-boot

Hi Stefan,

On 21 November 2014 at 08:34, Stefan Agner <stefan@agner.ch> wrote:
> Resynchronize memcpy/memset with kernel and build them explicitly
> in Thumb2 mode (unified syntax). Those assembler files can be
> built and linked in ARM mode too, however when calling them from
> Thumb2 built code, the stack got corrupted and the copy did not
> succeed (the exact details have not been traced back). Hoever,
> the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
> should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
> is set.
>
> To build the files without warning, some assembler instructions
> had to be replaced with their UAL compliant variant (thanks
> Jeroen for this input).
>
> To build the file in Thumb2 mode the implicit-it=always option need
> to be set to generate Thumb2 compliant IT instructions where needed.
> We add this option to the general AFLAGS when building for Thumb2.
>
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
> Changes since v1:
> - Don't set auto-it in AFLAGS
> - Removed "no-warn-deprecated" warning suppression
> - Converted non-UAL assembler instructions to their UAL compliant variants

Reviewed-by: Simon Glass <sjg@chromium.org>

Tested on pit and found that it fixed the problem with Thumb mode.
Still works fine in ARM mode.

Tested-by: Simon Glass <sjg@chromium.org>

A few nits:
- typo 'Hoever' in commit message
- you could mention the Linux commit/release you synced to

>
> Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
> memcpy object files before and after converting the instructions to UAL,
> the output was identical.
>
>  arch/arm/config.mk               |   4 +-
>  arch/arm/include/asm/assembler.h |  33 ++++++++++--
>  arch/arm/lib/memcpy.S            |  80 +++++++++++++++++++---------
>  arch/arm/lib/memset.S            | 112 ++++++++++++++++++++-------------------
>  4 files changed, 142 insertions(+), 87 deletions(-)
>

Regards,
Simon

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-11-30 19:33 ` Simon Glass
@ 2014-11-30 20:07   ` Stefan Agner
  0 siblings, 0 replies; 9+ messages in thread
From: Stefan Agner @ 2014-11-30 20:07 UTC (permalink / raw)
  To: u-boot

On 2014-11-30 20:33, Simon Glass wrote:
> Hi Stefan,
> 
> On 21 November 2014 at 08:34, Stefan Agner <stefan@agner.ch> wrote:
>> Resynchronize memcpy/memset with kernel and build them explicitly
>> in Thumb2 mode (unified syntax). Those assembler files can be
>> built and linked in ARM mode too, however when calling them from
>> Thumb2 built code, the stack got corrupted and the copy did not
>> succeed (the exact details have not been traced back). Hoever,
>> the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
>> should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
>> is set.
>>
>> To build the files without warning, some assembler instructions
>> had to be replaced with their UAL compliant variant (thanks
>> Jeroen for this input).
>>
>> To build the file in Thumb2 mode the implicit-it=always option need
>> to be set to generate Thumb2 compliant IT instructions where needed.
>> We add this option to the general AFLAGS when building for Thumb2.
>>
>> Signed-off-by: Stefan Agner <stefan@agner.ch>
>> ---
>> Changes since v1:
>> - Don't set auto-it in AFLAGS
>> - Removed "no-warn-deprecated" warning suppression
>> - Converted non-UAL assembler instructions to their UAL compliant variants
> 
> Reviewed-by: Simon Glass <sjg@chromium.org>
> 
> Tested on pit and found that it fixed the problem with Thumb mode.
> Still works fine in ARM mode.
> 
> Tested-by: Simon Glass <sjg@chromium.org>
> 
> A few nits:
> - typo 'Hoever' in commit message
> - you could mention the Linux commit/release you synced to
> 

Thanks for testing Simon.

The Linux commit/release I synced to was some 3.18-rcX. However, the
relevant files did not change since 3.17, so we can add that as the
synced version.

--
Stefan

>>
>> Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
>> memcpy object files before and after converting the instructions to UAL,
>> the output was identical.
>>
>>  arch/arm/config.mk               |   4 +-
>>  arch/arm/include/asm/assembler.h |  33 ++++++++++--
>>  arch/arm/lib/memcpy.S            |  80 +++++++++++++++++++---------
>>  arch/arm/lib/memset.S            | 112 ++++++++++++++++++++-------------------
>>  4 files changed, 142 insertions(+), 87 deletions(-)
>>
> 
> Regards,
> Simon

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-11-21 15:34 [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode Stefan Agner
  2014-11-27 23:02 ` Stefan Agner
  2014-11-30 19:33 ` Simon Glass
@ 2014-12-03 15:44 ` Bill Pringlemeir
  2014-12-03 17:04   ` Stefan Agner
  2 siblings, 1 reply; 9+ messages in thread
From: Bill Pringlemeir @ 2014-12-03 15:44 UTC (permalink / raw)
  To: u-boot


I tested these changes on several compilers and binutils.  4.3.3, 4.6.3,
4.7.2, 4.8.2 and 4.9.2 gcc with binutils 2.19, 2.21-24 in both THUMB and
ARM on the Vybrid.  It seem we must have an image <= 224kB which is the
IRAM bank0 size - 0x8000 of u-boot base; u-boot since v2014.04 will not
boot for me as the QSPI seemed to take it over this boundary.

The attached patches are meant to be attached as they aren't meant to be
taken by patchworks, etc but just to show the small changes I made to
minimize the image to less than 224kB.

[build script]
# File sizes similar because of 'mkimage' padding+hdr for 'imx'.
# 2nd size is for the 'thumb2' builds.
set -e

## (Sourcery CodeBench Lite 2012.03-56) 4.6.3
## (Sourcery CodeBench Lite 2012.03-56) 2.21.53.20110905
# 227092 Dec  1 15:20 u-boot.imx
# 173844 Dec  1 15:36 u-boot.imx
export PATH=$PATH:$HOME/x-tools/arm-2012.03/bin

## 4.7.4 20130913 (release) [ARM/embedded-4_7-branch revision 202601]
## GNU assembler (GNU Binutils) 2.23.2
# 222996 Dec  1 15:19 u-boot.imx
# 173844 Dec  1 15:35 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/arm-2013.q3/bin

## (crosstool-NG hg+default-9838aecd6340) 4.8.2
## (GNU Binutils) 2.23.2
# 222996 Dec  1 15:18 u-boot.imx
# 173844 Dec  1 15:34 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/arm-none-eabi/bin

## (Sourcery G++ Lite 2009q1-161) 4.3.3
## (Sourcery G++ Lite 2009q1-161) 2.19.51.20090205
# 218900 Dec  1 15:17 u-boot.imx - no CONFIG_SYS_LONGHELP
# GCC does not produce working THUMB binaries.
#export PATH=$PATH:/opt/arm-2009q1/bin
NAME=arm-none-eabi-

## (crosstool-NG 1.20.0) 4.9.2
## (crosstool-NG 1.20.0) 2.24
# 222996 Dec  1 15:21 u-boot.imx
# 173844 Dec  1 15:32 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi-vybrid-4.9.2/bin/

## (crosstool-NG hg+default-2a773bd38e71) 4.7.3
## (crosstool-NG hg+default-2a773bd38e71) 2.22
# 222996 Dec  1 15:22 u-boot.imx
# 173844 Dec  1 15:31 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi-4.7.2/bin

## (crosstool-NG hg+-c7fa97debb36) 4.6.3
## (crosstool-NG hg+-c7fa97debb36) 2.21.1
# 227092 Dec  1 15:24 u-boot.imx
# 173844 Dec  1 15:30 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi.gcc4.6.3/bin
#NAME=arm-none-linux-gnueabi-

## (Timesys 20130708) 4.7.3
## (Timesys 20130708) 2.23.2
# 227092 Dec  1 15:25 u-boot.imx
# 173844 Dec  1 15:29 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/armv7l-timesys-linux-gnueabi/bin
#NAME=armv7l-timesys-linux-gnueabi-

## (crosstool-NG linaro-1.13.1-4.8-2013.10 - Linaro GCC 2013.10) 4.8.2 20131014
## (crosstool-NG linaro-1.13.1-4.8-2013.10 - Linaro GCC 2013.10) 2.23.2.20130610 Linaro 2013.10-4
# 222996 Dec  1 15:26 u-boot.imx
# 173844 Dec  1 15:28 u-boot.imx
#export PATH=$PATH:$HOME/x-tools/gcc-linaro-arm-linux-gnueabihf-4.8-2013.10_linux/bin
#NAME=arm-linux-gnueabihf-

make CROSS_COMPILE=$NAME vf610twr_config
make CROSS_COMPILE=$NAME $* u-boot.imx
cp u-boot.imx /srv/tftp

[end script]

I applied Stefan's memcpy/thumb patch, reverted c6150aaf and applied
'fs/ext4/ext4fs.c, fs/fs.c fs/fat/fat_write.c: Adjust 64bit math
methods' by Tom Rini <trini@ti.com> (it has a little whitespace fat in
it).  All of the images booted on a Vybrid Tower (Rev H) from an SD
card.  Only issue is that I think the 'AFLAGS_NOWARN' can be removed as
per below...

On 21 Nov 2014, stefan at agner.ch wrote:

> Resynchronize memcpy/memset with kernel and build them explicitly
> in Thumb2 mode (unified syntax). Those assembler files can be
> built and linked in ARM mode too, however when calling them from
> Thumb2 built code, the stack got corrupted and the copy did not
> succeed (the exact details have not been traced back). Hoever,
> the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
> should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
> is set.

> To build the files without warning, some assembler instructions
> had to be replaced with their UAL compliant variant (thanks
> Jeroen for this input).

> To build the file in Thumb2 mode the implicit-it=always option need
> to be set to generate Thumb2 compliant IT instructions where needed.
> We add this option to the general AFLAGS when building for Thumb2.

> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
> Changes since v1: - Don't set auto-it in AFLAGS - Removed
> "no-warn-deprecated" warning suppression - Converted non-UAL assembler
> instructions to their UAL compliant variants

> Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
> memcpy object files before and after converting the instructions to
> UAL, the output was identical.
>
> arch/arm/config.mk | 4 +- arch/arm/include/asm/assembler.h | 33
> ++++++++++-- arch/arm/lib/memcpy.S | 80 +++++++++++++++++++---------
> arch/arm/lib/memset.S | 112 ++++++++++++++++++++------------------- 4
> files changed, 142 insertions(+), 87 deletions(-)
>
> diff --git a/arch/arm/config.mk b/arch/arm/config.mk
> index f0eafd6..14f4e9f 100644
> --- a/arch/arm/config.mk
> +++ b/arch/arm/config.mk
> @@ -26,7 +26,9 @@ PLATFORM_CPPFLAGS += -D__ARM__
>
> # Choose between ARM/Thumb instruction sets ifeq
> ($(CONFIG_SYS_THUMB_BUILD),y) -PF_CPPFLAGS_ARM := $(call cc-option,
> -mthumb -mthumb-interwork,\ +AFLAGS_IMPLICIT_IT := $(call
> as-option,-Wa$(comma)-mimplicit-it=always) +PF_CPPFLAGS_ARM :=
> $(AFLAGS_IMPLICIT_IT) $(AFLAGS_NOWARN) \ + $(call cc-option, -mthumb

I don't think we need the '$(AFLAGS_NOWARN)' as you fixed that?  I
removed it during my tests and didn't see anything.  Especially all
builds produced output.  I guess this is copy/paste from a Linux
Makefile as I don't even see where it is set in U-Boot.

> -mthumb-interwork,\ $(call cc-option,-marm,)\ $(call
> cc-option,-mno-thumb-interwork,)\ ) diff --git
> a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
> index 5e4789b..11b80fb 100644 --- a/arch/arm/include/asm/assembler.h
> +++ b/arch/arm/include/asm/assembler.h @@ -14,12 +14,14 @@ * assembler
> source.  */

[snip]

Tested-by: Bill Pringlemeir <bpringlemeir@nbsps.com>

-------------- next part --------------
A non-text attachment was scrubbed...
Name: thumb-mode.patch
Type: text/x-diff
Size: 1155 bytes
Desc: patches to 2015.01-rc2
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20141203/fdc2d39f/attachment.patch>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-12-03 15:44 ` Bill Pringlemeir
@ 2014-12-03 17:04   ` Stefan Agner
  2014-12-15 14:24     ` Andreas Färber
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Agner @ 2014-12-03 17:04 UTC (permalink / raw)
  To: u-boot

On 2014-12-03 16:44, Bill Pringlemeir wrote:
> I tested these changes on several compilers and binutils.  4.3.3, 4.6.3,
> 4.7.2, 4.8.2 and 4.9.2 gcc with binutils 2.19, 2.21-24 in both THUMB and
> ARM on the Vybrid.  It seem we must have an image <= 224kB which is the
> IRAM bank0 size - 0x8000 of u-boot base; u-boot since v2014.04 will not
> boot for me as the QSPI seemed to take it over this boundary.
> 
> The attached patches are meant to be attached as they aren't meant to be
> taken by patchworks, etc but just to show the small changes I made to
> minimize the image to less than 224kB.
> 
> [build script]
> # File sizes similar because of 'mkimage' padding+hdr for 'imx'.
> # 2nd size is for the 'thumb2' builds.
> set -e
> 
> ## (Sourcery CodeBench Lite 2012.03-56) 4.6.3
> ## (Sourcery CodeBench Lite 2012.03-56) 2.21.53.20110905
> # 227092 Dec  1 15:20 u-boot.imx
> # 173844 Dec  1 15:36 u-boot.imx
> export PATH=$PATH:$HOME/x-tools/arm-2012.03/bin
> 
> ## 4.7.4 20130913 (release) [ARM/embedded-4_7-branch revision 202601]
> ## GNU assembler (GNU Binutils) 2.23.2
> # 222996 Dec  1 15:19 u-boot.imx
> # 173844 Dec  1 15:35 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/arm-2013.q3/bin
> 
> ## (crosstool-NG hg+default-9838aecd6340) 4.8.2
> ## (GNU Binutils) 2.23.2
> # 222996 Dec  1 15:18 u-boot.imx
> # 173844 Dec  1 15:34 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/arm-none-eabi/bin
> 
> ## (Sourcery G++ Lite 2009q1-161) 4.3.3
> ## (Sourcery G++ Lite 2009q1-161) 2.19.51.20090205
> # 218900 Dec  1 15:17 u-boot.imx - no CONFIG_SYS_LONGHELP
> # GCC does not produce working THUMB binaries.
> #export PATH=$PATH:/opt/arm-2009q1/bin
> NAME=arm-none-eabi-
> 
> ## (crosstool-NG 1.20.0) 4.9.2
> ## (crosstool-NG 1.20.0) 2.24
> # 222996 Dec  1 15:21 u-boot.imx
> # 173844 Dec  1 15:32 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi-vybrid-4.9.2/bin/
> 
> ## (crosstool-NG hg+default-2a773bd38e71) 4.7.3
> ## (crosstool-NG hg+default-2a773bd38e71) 2.22
> # 222996 Dec  1 15:22 u-boot.imx
> # 173844 Dec  1 15:31 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi-4.7.2/bin
> 
> ## (crosstool-NG hg+-c7fa97debb36) 4.6.3
> ## (crosstool-NG hg+-c7fa97debb36) 2.21.1
> # 227092 Dec  1 15:24 u-boot.imx
> # 173844 Dec  1 15:30 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/arm-none-linux-gnueabi.gcc4.6.3/bin
> #NAME=arm-none-linux-gnueabi-
> 
> ## (Timesys 20130708) 4.7.3
> ## (Timesys 20130708) 2.23.2
> # 227092 Dec  1 15:25 u-boot.imx
> # 173844 Dec  1 15:29 u-boot.imx
> #export PATH=$PATH:$HOME/x-tools/armv7l-timesys-linux-gnueabi/bin
> #NAME=armv7l-timesys-linux-gnueabi-
> 
> ## (crosstool-NG linaro-1.13.1-4.8-2013.10 - Linaro GCC 2013.10) 4.8.2 20131014
> ## (crosstool-NG linaro-1.13.1-4.8-2013.10 - Linaro GCC 2013.10)
> 2.23.2.20130610 Linaro 2013.10-4
> # 222996 Dec  1 15:26 u-boot.imx
> # 173844 Dec  1 15:28 u-boot.imx
> #export
> PATH=$PATH:$HOME/x-tools/gcc-linaro-arm-linux-gnueabihf-4.8-2013.10_linux/bin
> #NAME=arm-linux-gnueabihf-
> 
> make CROSS_COMPILE=$NAME vf610twr_config
> make CROSS_COMPILE=$NAME $* u-boot.imx
> cp u-boot.imx /srv/tftp
> 
> [end script]
> 
> I applied Stefan's memcpy/thumb patch, reverted c6150aaf and applied
> 'fs/ext4/ext4fs.c, fs/fs.c fs/fat/fat_write.c: Adjust 64bit math
> methods' by Tom Rini <trini@ti.com> (it has a little whitespace fat in
> it).  All of the images booted on a Vybrid Tower (Rev H) from an SD
> card.  Only issue is that I think the 'AFLAGS_NOWARN' can be removed as
> per below...
> 
> On 21 Nov 2014, stefan at agner.ch wrote:
> 
>> Resynchronize memcpy/memset with kernel and build them explicitly
>> in Thumb2 mode (unified syntax). Those assembler files can be
>> built and linked in ARM mode too, however when calling them from
>> Thumb2 built code, the stack got corrupted and the copy did not
>> succeed (the exact details have not been traced back). Hoever,
>> the Linux kernel builds those files in Thumb2 mode. Hence U-Boot
>> should build them in Thumb2 mode too when CONFIG_SYS_THUMB_BUILD
>> is set.
> 
>> To build the files without warning, some assembler instructions
>> had to be replaced with their UAL compliant variant (thanks
>> Jeroen for this input).
> 
>> To build the file in Thumb2 mode the implicit-it=always option need
>> to be set to generate Thumb2 compliant IT instructions where needed.
>> We add this option to the general AFLAGS when building for Thumb2.
> 
>> Signed-off-by: Stefan Agner <stefan@agner.ch>
>> ---
>> Changes since v1: - Don't set auto-it in AFLAGS - Removed
>> "no-warn-deprecated" warning suppression - Converted non-UAL assembler
>> instructions to their UAL compliant variants
> 
>> Tested in ARM and Thumb2 mode on Vybrid SoC. Disassembled the memset/
>> memcpy object files before and after converting the instructions to
>> UAL, the output was identical.
>>
>> arch/arm/config.mk | 4 +- arch/arm/include/asm/assembler.h | 33
>> ++++++++++-- arch/arm/lib/memcpy.S | 80 +++++++++++++++++++---------
>> arch/arm/lib/memset.S | 112 ++++++++++++++++++++------------------- 4
>> files changed, 142 insertions(+), 87 deletions(-)
>>
>> diff --git a/arch/arm/config.mk b/arch/arm/config.mk
>> index f0eafd6..14f4e9f 100644
>> --- a/arch/arm/config.mk
>> +++ b/arch/arm/config.mk
>> @@ -26,7 +26,9 @@ PLATFORM_CPPFLAGS += -D__ARM__
>>
>> # Choose between ARM/Thumb instruction sets ifeq
>> ($(CONFIG_SYS_THUMB_BUILD),y) -PF_CPPFLAGS_ARM := $(call cc-option,
>> -mthumb -mthumb-interwork,\ +AFLAGS_IMPLICIT_IT := $(call
>> as-option,-Wa$(comma)-mimplicit-it=always) +PF_CPPFLAGS_ARM :=
>> $(AFLAGS_IMPLICIT_IT) $(AFLAGS_NOWARN) \ + $(call cc-option, -mthumb
> 
> I don't think we need the '$(AFLAGS_NOWARN)' as you fixed that?  I
> removed it during my tests and didn't see anything.  Especially all
> builds produced output.  I guess this is copy/paste from a Linux
> Makefile as I don't even see where it is set in U-Boot.
> 

You are right, I removed the assignation of that but did not remove it's
inclucion into the PF_CPPFLAGS_ARM variable...

Can this be fixed by the merger or should I create a new revision?

--
Stefan

>> -mthumb-interwork,\ $(call cc-option,-marm,)\ $(call
>> cc-option,-mno-thumb-interwork,)\ ) diff --git
>> a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
>> index 5e4789b..11b80fb 100644 --- a/arch/arm/include/asm/assembler.h
>> +++ b/arch/arm/include/asm/assembler.h @@ -14,12 +14,14 @@ * assembler
>> source.  */
> 
> [snip]
> 
> Tested-by: Bill Pringlemeir <bpringlemeir@nbsps.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-12-03 17:04   ` Stefan Agner
@ 2014-12-15 14:24     ` Andreas Färber
  2014-12-15 21:06       ` Stefan Agner
  0 siblings, 1 reply; 9+ messages in thread
From: Andreas Färber @ 2014-12-15 14:24 UTC (permalink / raw)
  To: u-boot

Hi Stefan,

Am 03.12.2014 um 18:04 schrieb Stefan Agner:
> Can this be fixed by the merger or should I create a new revision?

It looks as if this was neither applied nor respun? I have some more
patches to make CONFIG_USE_PRIVATE_LIBGCC build for Thumb that I would
like to rebase on it.

In particular, I have pieced together a tiny asm/unified.h file with
THUMB() and ARM() macros and doing the .syntax unified in that central
place, and resyncing the various arch/arm/lib/_*.S files from Linux.
https://github.com/afaerber/u-boot/commits/stm32

Is there a particular reason you are doing the .syntax inline here?

And looking beyond this patch, assuming this is for your Vybrid's M4, do
you already have a concept for how to go about vectors.S, start.S and
the like for ARMv7-M? :) I've stubbed them out for now but haven't
managed to successfully link yet...

Regards,
Andreas

-- 
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imend?rffer; HRB 21284 AG N?rnberg

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-12-15 14:24     ` Andreas Färber
@ 2014-12-15 21:06       ` Stefan Agner
  2014-12-18 15:39         ` Albert ARIBAUD
  0 siblings, 1 reply; 9+ messages in thread
From: Stefan Agner @ 2014-12-15 21:06 UTC (permalink / raw)
  To: u-boot

Hi Andreas,

On 2014-12-15 15:24, Andreas F?rber wrote:
> Hi Stefan,
> 
> Am 03.12.2014 um 18:04 schrieb Stefan Agner:
>> Can this be fixed by the merger or should I create a new revision?
> 
> It looks as if this was neither applied nor respun? I have some more
> patches to make CONFIG_USE_PRIVATE_LIBGCC build for Thumb that I would
> like to rebase on it.

Not sure what the expectation is... Probably I should just do a respin
since there are now three minor nits...

> 
> In particular, I have pieced together a tiny asm/unified.h file with
> THUMB() and ARM() macros and doing the .syntax unified in that central
> place, and resyncing the various arch/arm/lib/_*.S files from Linux.
> https://github.com/afaerber/u-boot/commits/stm32
> 
> Is there a particular reason you are doing the .syntax inline here?

No particular reason other than laziness :-)

> 
> And looking beyond this patch, assuming this is for your Vybrid's M4, do
> you already have a concept for how to go about vectors.S, start.S and
> the like for ARMv7-M? :) I've stubbed them out for now but haven't
> managed to successfully link yet...

Actually no, I don't use U-Boot on the M4 (so far). For Linux on the
Vybrid M4, I use a special boot loader which runs on the A5 Linux. On
Vybrid (the variant we use at least) boots always from the A5 first,
hence I have a running Linux. Or alternatively, I could run the M4 from
U-Boot running on the A5. So far, I had no need to run U-Boot on the M4.

The reason I was interested in Thumb2 was the size: Due to lack of 5V on
the Colibri module we need to load the boot loader using Vybrid's serial
loader over UART which is somewhat slow. So the 25% reduction of size
means 25% less download time...

--
Stefan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode
  2014-12-15 21:06       ` Stefan Agner
@ 2014-12-18 15:39         ` Albert ARIBAUD
  0 siblings, 0 replies; 9+ messages in thread
From: Albert ARIBAUD @ 2014-12-18 15:39 UTC (permalink / raw)
  To: u-boot

Hello Stefan,

On Mon, 15 Dec 2014 22:06:11 +0100, Stefan Agner <stefan@agner.ch>
wrote:
> Hi Andreas,
> 
> On 2014-12-15 15:24, Andreas F?rber wrote:
> > Hi Stefan,
> > 
> > Am 03.12.2014 um 18:04 schrieb Stefan Agner:
> >> Can this be fixed by the merger or should I create a new revision?
> > 
> > It looks as if this was neither applied nor respun? I have some more
> > patches to make CONFIG_USE_PRIVATE_LIBGCC build for Thumb that I would
> > like to rebase on it.
> 
> Not sure what the expectation is... Probably I should just do a respin
> since there are now three minor nits...

Agreed, and assuming the three nits are cared for, I'd take it as a
bugfix.

Amicalement,
-- 
Albert.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-12-18 15:39 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-21 15:34 [U-Boot] [PATCH v2] arm: build arch memset/memcpy in Thumb2 mode Stefan Agner
2014-11-27 23:02 ` Stefan Agner
2014-11-30 19:33 ` Simon Glass
2014-11-30 20:07   ` Stefan Agner
2014-12-03 15:44 ` Bill Pringlemeir
2014-12-03 17:04   ` Stefan Agner
2014-12-15 14:24     ` Andreas Färber
2014-12-15 21:06       ` Stefan Agner
2014-12-18 15:39         ` Albert ARIBAUD

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox