public inbox for u-boot@lists.denx.de
 help / color / mirror / Atom feed
* [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
@ 2008-06-04 19:44 Jason McMullan
  2008-06-10 10:51 ` Shinya Kuribayashi
  2008-07-05 22:32 ` Wolfgang Denk
  0 siblings, 2 replies; 6+ messages in thread
From: Jason McMullan @ 2008-06-04 19:44 UTC (permalink / raw)
  To: u-boot

This commit pulls over the memset() MIPS routine from Linux 2.6.26,
which provides a 10x to 20x speedup over the generic byte-at-a-time
routine. This is especially useful on platforms with manual ECC
scrubbing, that require all of memory to be written at least once
after a power cycle.
---
 include/asm-mips/string.h |    2 +-
 lib_mips/Makefile         |    2 +-
 lib_mips/memset.S         |  174 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 lib_mips/memset.S

diff --git a/include/asm-mips/string.h b/include/asm-mips/string.h
index 579a591..0df1463 100644
--- a/include/asm-mips/string.h
+++ b/include/asm-mips/string.h
@@ -27,7 +27,7 @@ extern int strcmp(__const__ char *__cs, __const__ char *__ct);
 #undef __HAVE_ARCH_STRNCMP
 extern int strncmp(__const__ char *__cs, __const__ char *__ct, __kernel_size_t __count);
 
-#undef __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, __kernel_size_t __count);
 
 #undef __HAVE_ARCH_MEMCPY
diff --git a/lib_mips/Makefile b/lib_mips/Makefile
index 8176437..9149039 100644
--- a/lib_mips/Makefile
+++ b/lib_mips/Makefile
@@ -25,7 +25,7 @@ include $(TOPDIR)/config.mk
 
 LIB	= $(obj)lib$(ARCH).a
 
-SOBJS-y	+=
+SOBJS-y	+= memset.o
 
 COBJS-y	+= board.o
 COBJS-y	+= bootm.o
diff --git a/lib_mips/memset.S b/lib_mips/memset.S
new file mode 100644
index 0000000..f1c07d7
--- /dev/null
+++ b/lib_mips/memset.S
@@ -0,0 +1,174 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
+#include <asm/asm.h>
+//#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+#if LONGSIZE == 4
+#define LONG_S_L swl
+#define LONG_S_R swr
+#else
+#define LONG_S_L sdl
+#define LONG_S_R sdr
+#endif
+
+#define EX(insn,reg,addr,handler)			\
+9:	insn	reg, addr;				\
+	.section __ex_table,"a"; 			\
+	PTR	9b, handler; 				\
+	.previous
+
+	.macro	f_fill64 dst, offset, val, fixup
+	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+#endif
+	.endm
+
+/*
+ * memset(void *s, int c, size_t n)
+ *
+ * a0: start of area to clear
+ * a1: char to fill with
+ * a2: size of area to clear
+ */
+	.set	noreorder
+	.align	5
+LEAF(memset)
+	beqz		a1, 1f
+	 move		v0, a0			/* result */
+
+	andi		a1, 0xff		/* spread fillword */
+	LONG_SLL		t1, a1, 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 16
+#if LONGSIZE == 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 32
+#endif
+	or		a1, t1
+1:
+
+FEXPORT(__bzero)
+	sltiu		t0, a2, LONGSIZE	/* very small region? */
+	bnez		t0, .Lsmall_memset
+	 andi		t0, a0, LONGMASK	/* aligned? */
+
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+	beqz		t0, 1f
+	 PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
+#else
+	.set		noat
+	li		AT, LONGSIZE
+	beqz		t0, 1f
+	 PTR_SUBU	t0, AT			/* alignment in bytes */
+	.set		at
+#endif
+
+	R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+	EX(LONG_S_L, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_R, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+	PTR_SUBU	a0, t0			/* long align ptr */
+	PTR_ADDU	a2, t0			/* correct size */
+
+1:	ori		t1, a2, 0x3f		/* # of full blocks */
+	xori		t1, 0x3f
+	beqz		t1, .Lmemset_partial	/* no block to fill */
+	 andi		t0, a2, 0x40-LONGSIZE
+
+	PTR_ADDU	t1, a0			/* end address */
+	.set		reorder
+1:	PTR_ADDIU	a0, 64
+	R10KCBARRIER(0(ra))
+	f_fill64 a0, -64, a1, .Lfwd_fixup
+	bne		t1, a0, 1b
+	.set		noreorder
+
+.Lmemset_partial:
+	R10KCBARRIER(0(ra))
+	PTR_LA		t1, 2f			/* where to start */
+#if LONGSIZE == 4
+	PTR_SUBU	t1, t0
+#else
+	.set		noat
+	LONG_SRL		AT, t0, 1
+	PTR_SUBU	t1, AT
+	.set		at
+#endif
+	jr		t1
+	 PTR_ADDU	a0, t0			/* dest ptr */
+
+	.set		push
+	.set		noreorder
+	.set		nomacro
+	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
+2:	.set		pop
+	andi		a2, LONGMASK		/* At most one long to go */
+
+	beqz		a2, 1f
+	 PTR_ADDU	a0, a2			/* What's left */
+	R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+	EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
+#endif
+1:	jr		ra
+	 move		a2, zero
+
+.Lsmall_memset:
+	beqz		a2, 2f
+	 PTR_ADDU	t1, a0, a2
+
+1:	PTR_ADDIU	a0, 1			/* fill bytewise */
+	R10KCBARRIER(0(ra))
+	bne		t1, a0, 1b
+	 sb		a1, -1(a0)
+
+2:	jr		ra			/* done */
+	 move		a2, zero
+	END(memset)
+
+.Lfirst_fixup:
+	jr	ra
+	 nop
+
+.Lfwd_fixup:
+	andi		a2, 0x3f
+	jr		ra
+	LONG_ADDU	a2, t1
+
+.Lpartial_fixup:
+	andi		a2, LONGMASK
+	jr		ra
+	LONG_ADDU	a2, t1
+
+.Llast_fixup:
+	jr		ra
+	 andi		v1, a2, LONGMASK
-- 
1.5.4.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
  2008-06-04 19:44 [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux Jason McMullan
@ 2008-06-10 10:51 ` Shinya Kuribayashi
  2008-06-13  7:04   ` Shinya Kuribayashi
  2008-07-05 22:32 ` Wolfgang Denk
  1 sibling, 1 reply; 6+ messages in thread
From: Shinya Kuribayashi @ 2008-06-10 10:51 UTC (permalink / raw)
  To: u-boot

Hi Jason,

Jason McMullan wrote:
> This commit pulls over the memset() MIPS routine from Linux 2.6.26,
> which provides a 10x to 20x speedup over the generic byte-at-a-time
> routine. This is especially useful on platforms with manual ECC
> scrubbing, that require all of memory to be written at least once
> after a power cycle.
> ---
>  include/asm-mips/string.h |    2 +-
>  lib_mips/Makefile         |    2 +-
>  lib_mips/memset.S         |  174 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 176 insertions(+), 2 deletions(-)
>  create mode 100644 lib_mips/memset.S

IIRC, Linux's memset relies on AdEL/AdES exceptions. We have Status.EXL
enabled, but don't have proper exception handlers, yet. So my question
is does this code always works expectedly, or works with some alignment
restriction?

And some nitpickings. See below.

> diff --git a/lib_mips/memset.S b/lib_mips/memset.S
> new file mode 100644
> index 0000000..f1c07d7
> --- /dev/null
> +++ b/lib_mips/memset.S
> @@ -0,0 +1,174 @@
> +/*
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + *
> + * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
> + * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
> + * Copyright (C) 2007  Maciej W. Rozycki
> + */
> +#include <asm/asm.h>
> +//#include <asm/asm-offsets.h>

Please remove unused #include. Even '#if 0'-ing is not allowed in
U-Boot policy.

> +#include <asm/regdef.h>
> +
> +#if LONGSIZE == 4
> +#define LONG_S_L swl
> +#define LONG_S_R swr
> +#else
> +#define LONG_S_L sdl
> +#define LONG_S_R sdr
> +#endif
> +
> +#define EX(insn,reg,addr,handler)			\
> +9:	insn	reg, addr;				\
> +	.section __ex_table,"a"; 			\
> +	PTR	9b, handler; 				\
> +	.previous
> +
> +	.macro	f_fill64 dst, offset, val, fixup
> +	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
> +#if LONGSIZE == 4
> +	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
> +	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
> +#endif
> +	.endm
> +
> +/*
> + * memset(void *s, int c, size_t n)
> + *
> + * a0: start of area to clear
> + * a1: char to fill with
> + * a2: size of area to clear
> + */
> +	.set	noreorder
> +	.align	5
> +LEAF(memset)
> +	beqz		a1, 1f
> +	 move		v0, a0			/* result */

        ^

> +	andi		a1, 0xff		/* spread fillword */
> +	LONG_SLL		t1, a1, 8
> +	or		a1, t1
> +	LONG_SLL		t1, a1, 16
> +#if LONGSIZE == 8
> +	or		a1, t1
> +	LONG_SLL		t1, a1, 32
> +#endif
> +	or		a1, t1
> +1:
> +
> +FEXPORT(__bzero)
> +	sltiu		t0, a2, LONGSIZE	/* very small region? */
> +	bnez		t0, .Lsmall_memset
> +	 andi		t0, a0, LONGMASK	/* aligned? */

        ^

[further part snipped]

Please fix wrong indentations with proper tabs. I know this is exactly
the same as Linux's memset, but we prefer to fix it correctly in U-Boot.

[ I used to do like you did, but changed my mind. Now I think this is
  better practice. Incoherent indentations with Linux is not a big deal
  IMO. Just diff -w option blows them away. ]

Thanks in advance,

-- 
Shinya Kuribayashi
NEC Electronics

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
  2008-06-10 10:51 ` Shinya Kuribayashi
@ 2008-06-13  7:04   ` Shinya Kuribayashi
  2008-06-13  7:13     ` Wolfgang Denk
  0 siblings, 1 reply; 6+ messages in thread
From: Shinya Kuribayashi @ 2008-06-13  7:04 UTC (permalink / raw)
  To: u-boot

Shinya Kuribayashi wrote:
>> +	andi		a1, 0xff		/* spread fillword */
>> +	LONG_SLL		t1, a1, 8
>> +	or		a1, t1
>> +	LONG_SLL		t1, a1, 16
>> +#if LONGSIZE == 8
>> +	or		a1, t1
>> +	LONG_SLL		t1, a1, 32
>> +#endif
>> +	or		a1, t1
>> +1:
>> +
>> +FEXPORT(__bzero)
>> +	sltiu		t0, a2, LONGSIZE	/* very small region? */
>> +	bnez		t0, .Lsmall_memset
>> +	 andi		t0, a0, LONGMASK	/* aligned? */
> 
>         ^
> 
> [further part snipped]
> 
> Please fix wrong indentations with proper tabs. I know this is exactly
> the same as Linux's memset, but we prefer to fix it correctly in U-Boot.

I found that above is an intended space to indicate that the instruction
is in the delay slot. I think it's probably a good old convention in
MIPS assembly programming, and would like to leave it as it is, IMHO.

Anyway, sorry for my ignorance and please ignore my comments on this.

-- 
Shinya Kuribayashi
NEC Electronics

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
  2008-06-13  7:04   ` Shinya Kuribayashi
@ 2008-06-13  7:13     ` Wolfgang Denk
  0 siblings, 0 replies; 6+ messages in thread
From: Wolfgang Denk @ 2008-06-13  7:13 UTC (permalink / raw)
  To: u-boot

In message <48521C14.4010504@necel.com> you wrote:
>
> I found that above is an intended space to indicate that the instruction
> is in the delay slot. I think it's probably a good old convention in
> MIPS assembly programming, and would like to leave it as it is, IMHO.

Indeed. If it has a deeper meaning, this should be left as is.

> Anyway, sorry for my ignorance and please ignore my comments on this.

Thanks for the explanation - I think most of us were not aware of any
such conventions. Speaking for me - I definitely was not.

Best regards,

Wolfgang Denk

-- 
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd at denx.de
Time is fluid ... like a river with currents, eddies, backwash.
	-- Spock, "The City on the Edge of Forever", stardate 3134.0

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux.
  2008-06-04 19:44 [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux Jason McMullan
  2008-06-10 10:51 ` Shinya Kuribayashi
@ 2008-07-05 22:32 ` Wolfgang Denk
  2008-07-07 13:50   ` [U-Boot-Users] [PATCH] mips: Bring over optimized memset()routine " McMullan, Jason
  1 sibling, 1 reply; 6+ messages in thread
From: Wolfgang Denk @ 2008-07-05 22:32 UTC (permalink / raw)
  To: u-boot

In message <20080604194815.A02FD6E7BD@mcmullan-linux.hq.netapp.com> you wrote:
> This commit pulls over the memset() MIPS routine from Linux 2.6.26,
> which provides a 10x to 20x speedup over the generic byte-at-a-time
> routine. This is especially useful on platforms with manual ECC
> scrubbing, that require all of memory to be written at least once
> after a power cycle.
> ---
>  include/asm-mips/string.h |    2 +-
>  lib_mips/Makefile         |    2 +-
>  lib_mips/memset.S         |  174 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 176 insertions(+), 2 deletions(-)
>  create mode 100644 lib_mips/memset.S

Shinya Kuribayashi asked some questions about your patch, which you
did not answer (as far as I can tell).

Do you intend to comment on the questions and/or submit a cleaned up
version of the patch?

Best regards,

Wolfgang Denk

-- 
DENX Software Engineering GmbH,     MD: Wolfgang Denk & Detlev Zundel
HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd at denx.de
"You can have my Unix system when you  pry  it  from  my  cold,  dead
fingers."                                                - Cal Keegan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [U-Boot-Users] [PATCH] mips: Bring over optimized memset()routine from Linux.
  2008-07-05 22:32 ` Wolfgang Denk
@ 2008-07-07 13:50   ` McMullan, Jason
  0 siblings, 0 replies; 6+ messages in thread
From: McMullan, Jason @ 2008-07-07 13:50 UTC (permalink / raw)
  To: u-boot

On Sun, 2008-07-06 at 00:32 +0200, Wolfgang Denk wrote:
> In message <20080604194815.A02FD6E7BD@mcmullan-linux.hq.netapp.com> you wrote:
> > This commit pulls over the memset() MIPS routine from Linux 2.6.26,
> > which provides a 10x to 20x speedup over the generic byte-at-a-time
> > routine. This is especially useful on platforms with manual ECC
> > scrubbing, that require all of memory to be written at least once
> > after a power cycle.
> Do you intend to comment on the questions and/or submit a cleaned up
> version of the patch?

Unfortunately, no follow-up patch is forthcoming.

I was able to use a spare DMA engine on our SOC to perform the memory
zeroing, which eliminated the need for the enhanced memcopy() routine.

Also, I am not familiar with the intricacies of MIPS exception handling
for alignment issues, so I was not able to come up with a good solution
for Shinya Kuribayashi's alignment trap issue questions.

Please retract the patch.

Jason McMullan
MTS SW
System Firmware

NetApp
724.741.5011    Fax
724.741.5166    Direct
412.656.3519    Mobile
jason.mcmullan at netapp.com
www.netapp.com


-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: This is a digitally signed message part
Url : http://lists.denx.de/pipermail/u-boot/attachments/20080707/0141f089/attachment.pgp 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2008-07-07 13:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-04 19:44 [U-Boot-Users] [PATCH] mips: Bring over optimized memset() routine from Linux Jason McMullan
2008-06-10 10:51 ` Shinya Kuribayashi
2008-06-13  7:04   ` Shinya Kuribayashi
2008-06-13  7:13     ` Wolfgang Denk
2008-07-05 22:32 ` Wolfgang Denk
2008-07-07 13:50   ` [U-Boot-Users] [PATCH] mips: Bring over optimized memset()routine " McMullan, Jason

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox