linux-arch.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: arnd@arndb.de, Vineet Gupta <Vineet.Gupta1@synopsys.com>,
	Joern Rennecke <joern.rennecke@embecosm.com>
Subject: [PATCH v2 12/76] ARC: String library
Date: Fri, 18 Jan 2013 17:54:26 +0530	[thread overview]
Message-ID: <1358511930-7424-13-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1358511930-7424-1-git-send-email-vgupta@synopsys.com>

Hand optimised asm code for ARC700 pipeline.
Originally written/optimized by Joern Rennecke

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Joern Rennecke <joern.rennecke@embecosm.com>
---
 arch/arc/include/asm/string.h |   40 +++++++++++++
 arch/arc/lib/memcmp.S         |  124 +++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcpy-700.S     |   66 ++++++++++++++++++++++
 arch/arc/lib/memset.S         |   59 +++++++++++++++++++
 arch/arc/lib/strchr-700.S     |  123 ++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/strcmp.S         |   96 +++++++++++++++++++++++++++++++
 arch/arc/lib/strcpy-700.S     |   70 +++++++++++++++++++++++
 arch/arc/lib/strlen.S         |   83 +++++++++++++++++++++++++++
 8 files changed, 661 insertions(+), 0 deletions(-)
 create mode 100644 arch/arc/include/asm/string.h
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S

diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h
new file mode 100644
index 0000000..87676c8
--- /dev/null
+++ b/arch/arc/include/asm/string.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -We had half-optimised memset/memcpy, got better versions of those
+ *  -Added memcmp, strchr, strcpy, strcmp, strlen
+ *
+ * Amit Bhor: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_STRING_H
+#define _ASM_ARC_STRING_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRCPY
+#define __HAVE_ARCH_STRCMP
+#define __HAVE_ARCH_STRLEN
+
+extern void *memset(void *ptr, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void memzero(void *ptr, __kernel_size_t n);
+extern int memcmp(const void *, const void *, __kernel_size_t);
+extern char *strchr(const char *s, int c);
+extern char *strcpy(char *dest, const char *src);
+extern int strcmp(const char *cs, const char *ct);
+extern __kernel_size_t strlen(const char *);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_ARC_STRING_H */
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000..bc813d5
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ARC_ENTRY memcmp
+	or	r12,r0,r1
+	asl_s	r12,r12,30
+	sub	r3,r2,1
+	brls	r2,r12,.Lbytewise
+	ld	r4,[r0,0]
+	ld	r5,[r1,0]
+	lsr.f	lp_count,r3,3
+	lpne	.Loop_end
+	ld_s	WORD2,[r0,4]
+	ld_s	r12,[r1,4]
+	brne	r4,r5,.Leven
+	ld.a	r4,[r0,8]
+	ld.a	r5,[r1,8]
+	brne	WORD2,r12,.Lodd
+.Loop_end:
+	asl_s	SHIFT,SHIFT,3
+	bhs_s	.Last_cmp
+	brne	r4,r5,.Leven
+	ld	r4,[r0,4]
+	ld	r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+	nop_s
+	; one more load latency cycle
+.Last_cmp:
+	xor	r0,r4,r5
+	bset	r0,r0,SHIFT
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	b.d	.Leven_cmp
+	and	r1,r1,24
+.Leven:
+	xor	r0,r4,r5
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+.Leven_cmp:
+	asl	r2,r4,r1
+	asl	r12,r5,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+	.balign	4
+.Lodd:
+	xor	r0,WORD2,r12
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+	asl_s	r2,r2,r1
+	asl_s	r12,r12,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+	neg_s	SHIFT,SHIFT
+	lsr	r4,r4,SHIFT
+	lsr	r5,r5,SHIFT
+	; slow track insn
+.Leven:
+	sub.f	r0,r4,r5
+	mov.ne	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+.Lodd:
+	cmp_s	WORD2,r12
+
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+#endif /* ENDIAN */
+	.balign	4
+.Lbytewise:
+	breq	r2,0,.Lnil
+	ldb	r4,[r0,0]
+	ldb	r5,[r1,0]
+	lsr.f	lp_count,r3
+	lpne	.Lbyte_end
+	ldb_s	r3,[r0,1]
+	ldb	r12,[r1,1]
+	brne	r4,r5,.Lbyte_even
+	ldb.a	r4,[r0,2]
+	ldb.a	r5,[r1,2]
+	brne	r3,r12,.Lbyte_odd
+.Lbyte_end:
+	bcc	.Lbyte_even
+	brne	r4,r5,.Lbyte_even
+	ldb_s	r3,[r0,1]
+	ldb_s	r12,[r1,1]
+.Lbyte_odd:
+	j_s.d	[blink]
+	sub	r0,r3,r12
+.Lbyte_even:
+	j_s.d	[blink]
+	sub	r0,r4,r5
+.Lnil:
+	j_s.d	[blink]
+	mov	r0,0
+ARC_EXIT memcmp
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000..b64cc10
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY memcpy
+	or	r3,r0,r1
+	asl_s	r3,r3,30
+	mov_s	r5,r0
+	brls.d	r2,r3,.Lcopy_bytewise
+	sub.f	r3,r2,1
+	ld_s	r12,[r1,0]
+	asr.f	lp_count,r3,3
+	bbit0.d	r3,2,.Lnox4
+	bmsk_s	r2,r2,1
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,4]
+.Lnox4:
+	lppnz	.Lendloop
+	ld_s	r3,[r1,4]
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,8]
+	st.ab	r3,[r5,4]
+.Lendloop:
+	breq	r2,0,.Last_store
+	ld	r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+	add3	r2,-1,r2
+	; uses long immediate
+	xor_s	r12,r12,r3
+	bmsk	r12,r12,r2
+    xor_s	r12,r12,r3
+#else /* BIG ENDIAN */
+	sub3	r2,31,r2
+	; uses long immediate
+        xor_s	r3,r3,r12
+        bmsk	r3,r3,r2
+        xor_s	r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+	j_s.d	[blink]
+	st	r12,[r5,0]
+
+	.balign	4
+.Lcopy_bytewise:
+	jcs	[blink]
+	ldb_s	r12,[r1,0]
+	lsr.f	lp_count,r3
+	bhs_s	.Lnox1
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,1]
+.Lnox1:
+	lppnz	.Lendbloop
+	ldb_s	r3,[r1,1]
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,2]
+	stb.ab	r3,[r5,1]
+.Lendbloop:
+	j_s.d	[blink]
+	stb	r12,[r5,0]
+ARC_EXIT memcpy
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000..9b2d88d
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+ARC_ENTRY memset
+	mov_s	r4,r0
+	or	r12,r0,r2
+	bmsk.f	r12,r12,1
+	extb_s	r1,r1
+	asl	r3,r1,8
+	beq.d	.Laligned
+	or_s	r1,r1,r3
+	brls	r2,SMALL,.Ltiny
+	add	r3,r2,r0
+	stb	r1,[r3,-1]
+	bclr_s	r3,r3,0
+	stw	r1,[r3,-2]
+	bmsk.f	r12,r0,1
+	add_s	r2,r2,r12
+	sub.ne	r2,r2,4
+	stb.ab	r1,[r4,1]
+	and	r4,r4,-2
+	stw.ab	r1,[r4,2]
+	and	r4,r4,-4
+.Laligned:	; This code address should be aligned for speed.
+	asl	r3,r1,16
+	lsr.f	lp_count,r2,2
+	or_s	r1,r1,r3
+	lpne	.Loop_end
+	st.ab	r1,[r4,4]
+.Loop_end:
+	j_s	[blink]
+
+	.balign	4
+.Ltiny:
+	mov.f	lp_count,r2
+	lpne	.Ltiny_end
+	stb.ab	r1,[r4,1]
+.Ltiny_end:
+	j_s	[blink]
+ARC_EXIT memset
+
+; memzero: @r0 = mem, @r1 = size_t
+; memset:  @r0 = mem, @r1 = char, @r2 = size_t
+
+ARC_ENTRY memzero
+    ; adjust bzero args to memset args
+    mov r2, r1
+    mov r1, 0
+    b  memset    ;tail call so need to tinker with blink
+ARC_EXIT memzero
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000..99c1047
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+   to avoid branches that are hard to predict.  On the other hand, the
+   presence of the norm instruction makes it easier to operate on whole
+   words branch-free.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strchr
+	extb_s	r1,r1
+	asl	r5,r1,8
+	bmsk	r2,r0,1
+	or	r5,r5,r1
+	mov_s	r3,0x01010101
+	breq.d	r2,r0,.Laligned
+	asl	r4,r5,16
+	sub_s	r0,r0,r2
+	asl	r7,r2,3
+	ld_s	r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+	asl	r7,r3,r7
+#else
+	lsr	r7,r3,r7
+#endif
+	or	r5,r5,r4
+	ror	r4,r3
+	sub	r12,r2,r7
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0_ua
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
+	b	.Lfound_char ; Likewise this one.
+; /* We require this code address to be unaligned for speed...  */
+.Laligned:
+	ld_s	r2,[r0]
+	or	r5,r5,r4
+	ror	r4,r3
+; /* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+	sub	r12,r2,r3
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r3
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
+	; Found searched-for character.  r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+   (i.e. the least significant matching byte) to be exact,
+   hence there is no problem with carry effects.  */
+.Lfound_char:
+	sub	r3,r7,1
+	bic	r3,r3,r7
+	norm	r2,r3
+	sub_s	r0,r0,1
+	asr_s	r2,r2,3
+	j.d	[blink]
+	sub_s	r0,r0,r2
+
+	.balign	4
+.Lfound0_ua:
+	mov	r3,r7
+.Lfound0:
+	sub	r3,r6,r3
+	bic	r3,r3,r6
+	and	r2,r3,r4
+	or_s	r12,r12,r2
+	sub_s	r3,r12,1
+	bic_s	r3,r3,r12
+	norm	r3,r3
+	add_s	r0,r0,3
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	sub_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.pl	r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+	lsr	r7,r7,7
+
+	bic	r2,r7,r6
+	norm	r2,r2
+	sub_s	r0,r0,4
+	asr_s	r2,r2,3
+	j.d	[blink]
+	add_s	r0,r0,r2
+
+.Lfound0_ua:
+	mov_s	r3,r7
+.Lfound0:
+	asl_s	r2,r2,7
+	or	r7,r6,r4
+	bic_s	r12,r12,r2
+	sub	r2,r7,r3
+	or	r2,r2,r6
+	bic	r12,r2,r12
+	bic.f	r3,r4,r12
+	norm	r3,r3
+
+	add.pl	r3,r3,1
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	add_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.mi	r0,0
+#endif /* ENDIAN */
+ARC_EXIT strchr
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000..5dc802b
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* This is optimized primarily for the ARC700.
+   It would be possible to speed up the loops by one cycle / word
+   respective one cycle / byte by forcing double source 1 alignment, unrolling
+   by a factor of two, and speculatively loading the second word / byte of
+   source 1; however, that would increase the overhead for loop setup / finish,
+   and strcmp might often terminate early.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcmp
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne	r2,0,.Lcharloop
+	mov_s	r12,0x01010101
+	ror	r5,r12
+.Lwordloop:
+	ld.ab	r2,[r0,4]
+	ld.ab	r3,[r1,4]
+	nop_s
+	sub	r4,r2,r12
+	bic	r4,r4,r2
+	and	r4,r4,r5
+	brne	r4,0,.Lfound0
+	breq	r2,r3,.Lwordloop
+#ifdef	__LITTLE_ENDIAN__
+	xor	r0,r2,r3	; mask for difference
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+#endif /* LITTLE ENDIAN */
+	cmp_s	r2,r3
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+
+	.balign	4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+	xor	r0,r2,r3	; mask for difference
+	or	r0,r0,r4	; or in zero indicator
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+	sub.f	r0,r2,r3
+	mov.hi	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#else /* BIG ENDIAN */
+	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
+	   because of carry-propagateion from a lower significant zero byte.
+	   We can compensate for this by checking that bit0 is zero.
+	   This compensation is not necessary in the step where we
+	   get a low estimate for r2, because in any affected bytes
+	   we already have 0x00 or 0x01, which will remain unchanged
+	   when bit 7 is cleared.  */
+	.balign	4
+.Lfound0:
+	lsr	r0,r4,8
+	lsr_s	r1,r2
+	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
+	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
+	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
+	cmp_s	r3,r2		; ... be independent of trailing garbage
+	or_s	r2,r2,r0	; likewise for r3 > r2
+	bic_s	r3,r3,r0
+	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
+	cmp_s	r2,r3
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#endif /* ENDIAN */
+
+	.balign	4
+.Lcharloop:
+	ldb.ab	r2,[r0,1]
+	ldb.ab	r3,[r1,1]
+	nop_s
+	breq	r2,0,.Lcmpend
+	breq	r2,r3,.Lcharloop
+.Lcmpend:
+	j_s.d	[blink]
+	sub	r0,r2,r3
+ARC_EXIT strcmp
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000..b7ca4ae
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+   it 8 byte aligned.  Thus, we can do a little read-ahead, without
+   dereferencing a cache line that we should not touch.
+   Note that short and long instructions have been scheduled to avoid
+   branch stalls.
+   The beq_s to r3z could be made unaligned & long to avoid a stall
+   there, but the it is not likely to be taken often, and it
+   would also be likey to cost an unaligned mispredict at the next call.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcpy
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne.d	r2,0,charloop
+	mov_s	r10,r0
+	ld_s	r3,[r1,0]
+	mov	r8,0x01010101
+	bbit0.d	r1,2,loop_start
+	ror	r12,r8
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne	r3z
+	mov_s	r4,r3
+	.balign 4
+loop:
+	ld.a	r3,[r1,4]
+	st.ab	r4,[r10,4]
+loop_start:
+	ld.a	r4,[r1,4]
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne_s	r3z
+	st.ab	r3,[r10,4]
+	sub	r2,r4,r8
+	bic	r2,r2,r4
+	tst	r2,r12
+	beq	loop
+	mov_s	r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z:	bmsk.f	r1,r3,7
+	lsr_s	r3,r3,8
+#else
+r3z:	lsr.f	r1,r3,24
+	asl_s	r3,r3,8
+#endif
+	bne.d	r3z
+	stb.ab	r1,[r10,1]
+	j_s	[blink]
+
+	.balign	4
+charloop:
+	ldb.ab	r3,[r1,1]
+
+
+	brne.d	r3,0,charloop
+	stb.ab	r3,[r10,1]
+	j	[blink]
+ARC_EXIT strcpy
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000..39759e0
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strlen
+	or	r3,r0,7
+	ld	r2,[r3,-7]
+	ld.a	r6,[r3,-3]
+	mov	r4,0x01010101
+	; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+	asl_s	r1,r0,3
+	btst_s	r0,2
+	asl	r7,r4,r1
+	ror	r5,r4
+	sub	r1,r2,r7
+	bic_s	r1,r1,r2
+	mov.eq	r7,r4
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#else /* BIG ENDIAN */
+	ror	r5,r4
+	btst_s	r0,2
+	mov_s	r1,31
+	sub3	r7,r1,r0
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	bmsk	r1,r1,r7
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	bmsk.ne	r12,r12,r7
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+	ld_s	r2,[r3,4]
+	ld.a	r6,[r3,8]
+	; stall for load result
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	or	r12,r12,r1
+	and	r12,r12,r5
+	breq r12,0,.Loop
+.Lend:
+	and.f	r1,r1,r5
+	sub.ne	r3,r3,4
+	mov.eq	r1,r12
+#ifdef __LITTLE_ENDIAN__
+	sub_s	r2,r1,1
+	bic_s	r2,r2,r1
+	norm	r1,r2
+	sub_s	r0,r0,3
+	lsr_s	r1,r1,3
+	sub	    r0,r3,r0
+	j_s.d	[blink]
+	sub	    r0,r0,r1
+#else /* BIG ENDIAN */
+	lsr_s	r1,r1,7
+	mov.eq	r2,r6
+	bic_s	r1,r1,r2
+	norm	r1,r1
+	sub	    r0,r3,r0
+	lsr_s	r1,r1,3
+	j_s.d	[blink]
+	add	    r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+	b.d	.Lend
+	sub_s.ne r1,r1,r1
+ARC_EXIT strlen
-- 
1.7.4.1

  parent reply	other threads:[~2013-01-18 12:27 UTC|newest]

Thread overview: 232+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-01-18 12:24 [PATCH v2 00/76] Synopsys ARC Linux kernel Port Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 01/76] ARC: Generic Headers Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 02/76] ARC: irqflags - Interrupt enabling/disabling at in-core intc Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 03/76] ARC: Atomic/bitops/cmpxchg/barriers Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 04/76] asm-generic headers: uaccess.h to conditionally define segment_eq() Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 05/76] ARC: uaccess friends Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 06/76] asm-generic: uaccess: Allow arches to over-ride __{get,put}_user_fn() Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 07/76] ARC: [optim] uaccess __{get,put}_user() optimised Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 14:30   ` Arnd Bergmann
2013-01-18 14:30     ` Arnd Bergmann
2013-01-18 12:24 ` [PATCH v2 08/76] asm-generic headers: Allow yet more arch overrides in checksum.h Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 09/76] ARC: Checksum/byteorder/swab routines Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 14:21   ` Arnd Bergmann
2013-01-18 14:21     ` Arnd Bergmann
2013-01-18 14:26     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 10/76] ARC: Fundamental ARCH data-types/defines Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 14:15   ` Arnd Bergmann
2013-01-19  3:25   ` Al Viro
2013-01-19 13:11     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 11/76] ARC: Spinlock/rwlock/mutex primitives Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 13:59   ` Arnd Bergmann
2013-01-18 12:24 ` Vineet Gupta [this message]
2013-01-18 12:24 ` [PATCH v2 13/76] ARC: Low level IRQ/Trap/Exception Handling Vineet Gupta
2013-01-19  3:31   ` Al Viro
2013-01-19  3:31     ` Al Viro
2013-01-19 13:48     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 14/76] ARC: Interrupt Handling Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 15/76] ARC: Non-MMU Exception Handling Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 16/76] ARC: Syscall support (no-legacy-syscall ABI) Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 13:58   ` Arnd Bergmann
2013-01-19  3:09   ` Al Viro
2013-01-19 12:56     ` Vineet Gupta
2013-01-21  6:55     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 17/76] ARC: Process-creation/scheduling/idle-loop Vineet Gupta
2013-01-18 14:35   ` Arnd Bergmann
2013-01-21 11:19     ` Vineet Gupta
2013-01-21 11:19       ` Vineet Gupta
2013-01-21 14:21       ` Arnd Bergmann
2013-01-18 12:24 ` [PATCH v2 18/76] ARC: Timers/counters/delay management Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 19/76] ARC: Signal handling Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-19  3:23   ` Al Viro
2013-01-19  3:34     ` Al Viro
2013-01-19  3:34       ` Al Viro
2013-01-19 14:10     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 20/76] ARC: [Review] Preparing to fix incorrect syscall restarts due to signals Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 21/76] ARC: [Review] Prevent incorrect syscall restarts Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 22/76] ARC: Cache Flush Management Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 23/76] ARC: Page Table Management Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 24/76] ARC: MMU Context Management Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 25/76] ARC: MMU Exception Handling Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 26/76] ARC: TLB flush Handling Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 27/76] ARC: Page Fault handling Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 28/76] ARC: I/O and DMA Mappings Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 15:55   ` Arnd Bergmann
2013-01-18 16:01     ` Paul Mundt
2013-01-18 16:01       ` Paul Mundt
2013-01-18 16:18       ` Arnd Bergmann
2013-01-18 16:18         ` Arnd Bergmann
2013-01-21 12:38     ` Vineet Gupta
2013-01-21 14:26       ` Arnd Bergmann
2013-01-18 12:24 ` [PATCH v2 29/76] ARC: Boot #1: low-level, setup_arch(), /proc/cpuinfo, mem init Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 14:45   ` Arnd Bergmann
2013-01-22  7:49     ` Vineet Gupta
2013-01-22  7:49       ` Vineet Gupta
2013-01-22  8:23       ` Arnd Bergmann
2013-01-22  8:23         ` Arnd Bergmann
2013-01-22  8:31         ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 30/76] ARC: [plat-arcfpga] Static platform device for CONFIG_SERIAL_ARC Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 31/76] ARC: Build system: Makefiles, Kconfig, Linker script Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 19:04   ` Sam Ravnborg
2013-01-18 19:25     ` Arnd Bergmann
2013-01-19 12:23     ` Vineet Gupta
2013-01-18 19:08   ` Sam Ravnborg
2013-01-19 12:26     ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 32/76] ARC: [DeviceTree] Basic support Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 15:53   ` Rob Herring
     [not found]     ` <50F97017.4090705-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2013-01-21 10:14       ` Vineet Gupta
2013-01-21 10:14         ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 33/76] ARC: [DeviceTree] Convert some Kconfig items to runtime values Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 34/76] ARC: [plat-arcfpga]: Enabling DeviceTree for Angel4 board Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 35/76] ARC: Last bits (stubs) to get to a running kernel with UART Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 36/76] ARC: Switch to generic kernel_thread() - split ret_from_fork Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 37/76] ARC: Switch to generic kernel_execve() and sys_execve() Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 38/76] ARC: Switch to saner kernel_execve() semantics #1 Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 39/76] ARC: Switch to saner kernel_execve() semantics #2 Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 40/76] ARC: Switch to generic sys_clone, fork, vfork Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 41/76] ARC: [3.8 tracking] altstack consolidation, trace_clock, cacheflush.h Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 42/76] ARC: [plat-arcfpga] defconfig Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 43/76] ARC: [optim] Cache "current" in Register r25 Vineet Gupta
2013-01-18 12:24 ` [PATCH v2 44/76] ARC: ptrace support Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 14:48   ` Arnd Bergmann
2013-01-18 12:24 ` [PATCH v2 45/76] ARC: Futex support Vineet Gupta
2013-01-18 12:24   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 46/76] ARC: OProfile support Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 47/76] ARC: Support for high priority interrupts in the in-core intc Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 48/76] ARC: Module support Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:50   ` Arnd Bergmann
2013-01-19 11:56     ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 49/76] ARC: Diagnostics: show_regs() etc Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 50/76] ARC: SMP support Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:53   ` Arnd Bergmann
2013-01-22  8:57     ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 51/76] ARC: DWARF2 .debug_frame based stack unwinder Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 52/76] ARC: stacktracing APIs based on dw2 unwinder Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 53/76] ARC: disassembly (needed by kprobes/kgdb/unaligned-access-emul) Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 54/76] ARC: kprobes support Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 55/76] sysctl: Enable PARISC "unaligned-trap" to be used cross-arch Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 56/76] ARC: Unaligned access emulation Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:54   ` Arnd Bergmann
2013-01-18 14:54     ` Arnd Bergmann
2013-01-18 12:25 ` [PATCH v2 57/76] ARC: kgdb support Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 13:15   ` Jason Wessel
2013-01-18 13:31     ` Vineet Gupta
2013-01-18 13:31       ` Vineet Gupta
2013-01-18 14:25       ` Jason Wessel
2013-01-18 12:25 ` [PATCH v2 58/76] ARC: Boot #2: Verbose Boot reporting / feature verification Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 59/76] ARC: [plat-arfpga] BVCI Latency Unit setup Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 60/76] perf, ARC: Enable building perf tools for ARC Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-23 11:31   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 61/76] ARC: perf support (software counters only) Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 62/76] ARC: Support for single cycle Close Coupled Mem (CCM) Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 63/76] ARC: Hostlink Pseudo-Driver for Metaware Debugger Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:58   ` Arnd Bergmann
2013-01-18 14:58     ` Arnd Bergmann
2013-01-21 13:51     ` Vineet Gupta
2013-01-21 13:51       ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 64/76] ARC: Add self to MAINTAINERS Vineet Gupta
2013-01-22 13:21   ` James Hogan
2013-01-22 13:27     ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 65/76] ARC: UAPI Disintegrate arch/arc/include/asm Vineet Gupta
2013-01-23 11:34   ` Vineet Gupta
2013-01-23 12:50   ` David Howells
2013-01-23 13:03     ` Vineet Gupta
2013-01-24  5:46     ` Vineet Gupta
2013-01-24  9:54       ` James Hogan
2013-01-24 13:28       ` David Howells
2013-01-18 12:25 ` [PATCH v2 66/76] ARC: Add support for ioremap_prot API Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 12:25 ` [PATCH v2 67/76] ARC: [Review] Multi-platform image #1: Kconfig enablement Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:59   ` Arnd Bergmann
2013-01-18 14:59     ` Arnd Bergmann
2013-01-18 12:25 ` [PATCH v2 68/76] ARC: Fold boards sub-menu into platform/SoC menu Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 14:59   ` Arnd Bergmann
2013-01-18 14:59     ` Arnd Bergmann
2013-01-18 12:25 ` [PATCH v2 69/76] ARC: [Review] Multi-platform image #2: Board callback Infrastructure Vineet Gupta
2013-01-18 15:05   ` Arnd Bergmann
2013-01-18 15:05     ` Arnd Bergmann
2013-01-21 14:10     ` Vineet Gupta
2013-01-21 14:10       ` Vineet Gupta
2013-01-21 14:29       ` Arnd Bergmann
2013-01-18 12:25 ` [PATCH v2 70/76] ARC: [Review] Multi-platform image #3: switch to board callback Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 15:05   ` Arnd Bergmann
2013-01-18 12:25 ` [PATCH v2 71/76] ARC: [Review] Multi-platform image #4: Isolate platform headers Vineet Gupta
2013-01-18 12:25   ` Vineet Gupta
2013-01-18 15:06   ` Arnd Bergmann
2013-01-18 15:06     ` Arnd Bergmann
2013-01-18 12:40 ` [PATCH v2 72/76] ARC: [Review] Multi-platform image #5: NR_IRQS defined by ARC core Vineet Gupta
2013-01-18 12:40   ` Vineet Gupta
2013-01-18 12:40   ` [PATCH v2 73/76] ARC: [Review] Multi-platform image #6: cpu-to-dma-addr optional Vineet Gupta
2013-01-18 12:40     ` Vineet Gupta
2013-01-18 15:07     ` Arnd Bergmann
2013-01-18 12:40   ` [PATCH v2 74/76] ARC: [Review] Multi-platform image #7: SMP common code to use callbacks Vineet Gupta
2013-01-18 12:40     ` Vineet Gupta
2013-01-18 15:08     ` Arnd Bergmann
2013-01-18 12:40   ` [PATCH v2 75/76] ARC: [Review] Multi-platform image #8: platform registers SMP callbacks Vineet Gupta
2013-01-18 12:40   ` [PATCH v2 76/76] ARC: [plat-arcfpga] defconfig for fully loaded ARC Linux Vineet Gupta
2013-01-18 12:40     ` Vineet Gupta
2013-01-18 15:12 ` [PATCH v2 00/76] Synopsys ARC Linux kernel Port Arnd Bergmann
2013-01-24  8:54   ` Vineet Gupta
2013-01-24  9:52     ` James Hogan
2013-01-24 10:11       ` Vineet Gupta
2013-01-24 12:00         ` James Hogan
2013-01-20  6:15 ` H. Peter Anvin
2013-01-21  5:50   ` Vineet Gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1358511930-7424-13-git-send-email-vgupta@synopsys.com \
    --to=vineet.gupta1@synopsys.com \
    --cc=arnd@arndb.de \
    --cc=joern.rennecke@embecosm.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).