All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de, arnd@arndb.de,
	Vineet Gupta <Vineet.Gupta1@synopsys.com>
Subject: [RFC PATCH v1 10/31] ARC: string library
Date: Wed,  7 Nov 2012 10:47:33 +0100	[thread overview]
Message-ID: <1352281674-2186-11-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1352281674-2186-1-git-send-email-vgupta@synopsys.com>

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/string.h |   40 +++++++++++++
 arch/arc/lib/memcmp.S         |  124 +++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcpy-700.S     |   66 ++++++++++++++++++++++
 arch/arc/lib/memset.S         |   59 +++++++++++++++++++
 arch/arc/lib/strchr-700.S     |  123 ++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/strcmp.S         |   96 +++++++++++++++++++++++++++++++
 arch/arc/lib/strcpy-700.S     |   70 +++++++++++++++++++++++
 arch/arc/lib/strlen.S         |   83 +++++++++++++++++++++++++++
 8 files changed, 661 insertions(+), 0 deletions(-)
 create mode 100644 arch/arc/include/asm/string.h
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S

diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h
new file mode 100644
index 0000000..87676c8
--- /dev/null
+++ b/arch/arc/include/asm/string.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * vineetg: May 2011
+ *  -We had half-optimised memset/memcpy, got better versions of those
+ *  -Added memcmp, strchr, strcpy, strcmp, strlen
+ *
+ * Amit Bhor: Codito Technologies 2004
+ */
+
+#ifndef _ASM_ARC_STRING_H
+#define _ASM_ARC_STRING_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+
+#define __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMCPY
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_STRCHR
+#define __HAVE_ARCH_STRCPY
+#define __HAVE_ARCH_STRCMP
+#define __HAVE_ARCH_STRLEN
+
+extern void *memset(void *ptr, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void memzero(void *ptr, __kernel_size_t n);
+extern int memcmp(const void *, const void *, __kernel_size_t);
+extern char *strchr(const char *s, int c);
+extern char *strcpy(char *dest, const char *src);
+extern int strcmp(const char *cs, const char *ct);
+extern __kernel_size_t strlen(const char *);
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_ARC_STRING_H */
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000..bc813d5
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ARC_ENTRY memcmp
+	or	r12,r0,r1
+	asl_s	r12,r12,30
+	sub	r3,r2,1
+	brls	r2,r12,.Lbytewise
+	ld	r4,[r0,0]
+	ld	r5,[r1,0]
+	lsr.f	lp_count,r3,3
+	lpne	.Loop_end
+	ld_s	WORD2,[r0,4]
+	ld_s	r12,[r1,4]
+	brne	r4,r5,.Leven
+	ld.a	r4,[r0,8]
+	ld.a	r5,[r1,8]
+	brne	WORD2,r12,.Lodd
+.Loop_end:
+	asl_s	SHIFT,SHIFT,3
+	bhs_s	.Last_cmp
+	brne	r4,r5,.Leven
+	ld	r4,[r0,4]
+	ld	r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+	nop_s
+	; one more load latency cycle
+.Last_cmp:
+	xor	r0,r4,r5
+	bset	r0,r0,SHIFT
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	b.d	.Leven_cmp
+	and	r1,r1,24
+.Leven:
+	xor	r0,r4,r5
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+.Leven_cmp:
+	asl	r2,r4,r1
+	asl	r12,r5,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+	.balign	4
+.Lodd:
+	xor	r0,WORD2,r12
+	sub_s	r1,r0,1
+	bic_s	r1,r1,r0
+	norm	r1,r1
+	; slow track insn
+	and	r1,r1,24
+	asl_s	r2,r2,r1
+	asl_s	r12,r12,r1
+	lsr_s	r2,r2,1
+	lsr_s	r12,r12,1
+	j_s.d	[blink]
+	sub	r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+	neg_s	SHIFT,SHIFT
+	lsr	r4,r4,SHIFT
+	lsr	r5,r5,SHIFT
+	; slow track insn
+.Leven:
+	sub.f	r0,r4,r5
+	mov.ne	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+.Lodd:
+	cmp_s	WORD2,r12
+
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+#endif /* ENDIAN */
+	.balign	4
+.Lbytewise:
+	breq	r2,0,.Lnil
+	ldb	r4,[r0,0]
+	ldb	r5,[r1,0]
+	lsr.f	lp_count,r3
+	lpne	.Lbyte_end
+	ldb_s	r3,[r0,1]
+	ldb	r12,[r1,1]
+	brne	r4,r5,.Lbyte_even
+	ldb.a	r4,[r0,2]
+	ldb.a	r5,[r1,2]
+	brne	r3,r12,.Lbyte_odd
+.Lbyte_end:
+	bcc	.Lbyte_even
+	brne	r4,r5,.Lbyte_even
+	ldb_s	r3,[r0,1]
+	ldb_s	r12,[r1,1]
+.Lbyte_odd:
+	j_s.d	[blink]
+	sub	r0,r3,r12
+.Lbyte_even:
+	j_s.d	[blink]
+	sub	r0,r4,r5
+.Lnil:
+	j_s.d	[blink]
+	mov	r0,0
+ARC_EXIT memcmp
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000..b64cc10
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY memcpy
+	or	r3,r0,r1
+	asl_s	r3,r3,30
+	mov_s	r5,r0
+	brls.d	r2,r3,.Lcopy_bytewise
+	sub.f	r3,r2,1
+	ld_s	r12,[r1,0]
+	asr.f	lp_count,r3,3
+	bbit0.d	r3,2,.Lnox4
+	bmsk_s	r2,r2,1
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,4]
+.Lnox4:
+	lppnz	.Lendloop
+	ld_s	r3,[r1,4]
+	st.ab	r12,[r5,4]
+	ld.a	r12,[r1,8]
+	st.ab	r3,[r5,4]
+.Lendloop:
+	breq	r2,0,.Last_store
+	ld	r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+	add3	r2,-1,r2
+	; uses long immediate
+	xor_s	r12,r12,r3
+	bmsk	r12,r12,r2
+    xor_s	r12,r12,r3
+#else /* BIG ENDIAN */
+	sub3	r2,31,r2
+	; uses long immediate
+        xor_s	r3,r3,r12
+        bmsk	r3,r3,r2
+        xor_s	r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+	j_s.d	[blink]
+	st	r12,[r5,0]
+
+	.balign	4
+.Lcopy_bytewise:
+	jcs	[blink]
+	ldb_s	r12,[r1,0]
+	lsr.f	lp_count,r3
+	bhs_s	.Lnox1
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,1]
+.Lnox1:
+	lppnz	.Lendbloop
+	ldb_s	r3,[r1,1]
+	stb.ab	r12,[r5,1]
+	ldb.a	r12,[r1,2]
+	stb.ab	r3,[r5,1]
+.Lendbloop:
+	j_s.d	[blink]
+	stb	r12,[r5,0]
+ARC_EXIT memcpy
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000..9b2d88d
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+ARC_ENTRY memset
+	mov_s	r4,r0
+	or	r12,r0,r2
+	bmsk.f	r12,r12,1
+	extb_s	r1,r1
+	asl	r3,r1,8
+	beq.d	.Laligned
+	or_s	r1,r1,r3
+	brls	r2,SMALL,.Ltiny
+	add	r3,r2,r0
+	stb	r1,[r3,-1]
+	bclr_s	r3,r3,0
+	stw	r1,[r3,-2]
+	bmsk.f	r12,r0,1
+	add_s	r2,r2,r12
+	sub.ne	r2,r2,4
+	stb.ab	r1,[r4,1]
+	and	r4,r4,-2
+	stw.ab	r1,[r4,2]
+	and	r4,r4,-4
+.Laligned:	; This code address should be aligned for speed.
+	asl	r3,r1,16
+	lsr.f	lp_count,r2,2
+	or_s	r1,r1,r3
+	lpne	.Loop_end
+	st.ab	r1,[r4,4]
+.Loop_end:
+	j_s	[blink]
+
+	.balign	4
+.Ltiny:
+	mov.f	lp_count,r2
+	lpne	.Ltiny_end
+	stb.ab	r1,[r4,1]
+.Ltiny_end:
+	j_s	[blink]
+ARC_EXIT memset
+
+; memzero: @r0 = mem, @r1 = size_t
+; memset:  @r0 = mem, @r1 = char, @r2 = size_t
+
+ARC_ENTRY memzero
+    ; adjust bzero args to memset args
+    mov r2, r1
+    mov r1, 0
+    b  memset    ;tail call so need to tinker with blink
+ARC_EXIT memzero
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000..99c1047
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+   to avoid branches that are hard to predict.  On the other hand, the
+   presence of the norm instruction makes it easier to operate on whole
+   words branch-free.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strchr
+	extb_s	r1,r1
+	asl	r5,r1,8
+	bmsk	r2,r0,1
+	or	r5,r5,r1
+	mov_s	r3,0x01010101
+	breq.d	r2,r0,.Laligned
+	asl	r4,r5,16
+	sub_s	r0,r0,r2
+	asl	r7,r2,3
+	ld_s	r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+	asl	r7,r3,r7
+#else
+	lsr	r7,r3,r7
+#endif
+	or	r5,r5,r4
+	ror	r4,r3
+	sub	r12,r2,r7
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0_ua
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
+	b	.Lfound_char ; Likewise this one.
+; /* We require this code address to be unaligned for speed...  */
+.Laligned:
+	ld_s	r2,[r0]
+	or	r5,r5,r4
+	ror	r4,r3
+; /* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+	sub	r12,r2,r3
+	bic_s	r12,r12,r2
+	and	r12,r12,r4
+	brne.d	r12,0,.Lfound0
+	xor	r6,r2,r5
+	ld.a	r2,[r0,4]
+	sub	r12,r6,r3
+	bic	r12,r12,r6
+	and	r7,r12,r4
+	breq	r7,0,.Loop /* ... so that this branch is unaligned.  */
+	; Found searched-for character.  r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+   (i.e. the least significant matching byte) to be exact,
+   hence there is no problem with carry effects.  */
+.Lfound_char:
+	sub	r3,r7,1
+	bic	r3,r3,r7
+	norm	r2,r3
+	sub_s	r0,r0,1
+	asr_s	r2,r2,3
+	j.d	[blink]
+	sub_s	r0,r0,r2
+
+	.balign	4
+.Lfound0_ua:
+	mov	r3,r7
+.Lfound0:
+	sub	r3,r6,r3
+	bic	r3,r3,r6
+	and	r2,r3,r4
+	or_s	r12,r12,r2
+	sub_s	r3,r12,1
+	bic_s	r3,r3,r12
+	norm	r3,r3
+	add_s	r0,r0,3
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	sub_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.pl	r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+	lsr	r7,r7,7
+
+	bic	r2,r7,r6
+	norm	r2,r2
+	sub_s	r0,r0,4
+	asr_s	r2,r2,3
+	j.d	[blink]
+	add_s	r0,r0,r2
+
+.Lfound0_ua:
+	mov_s	r3,r7
+.Lfound0:
+	asl_s	r2,r2,7
+	or	r7,r6,r4
+	bic_s	r12,r12,r2
+	sub	r2,r7,r3
+	or	r2,r2,r6
+	bic	r12,r2,r12
+	bic.f	r3,r4,r12
+	norm	r3,r3
+
+	add.pl	r3,r3,1
+	asr_s	r12,r3,3
+	asl.f	0,r2,r3
+	add_s	r0,r0,r12
+	j_s.d	[blink]
+	mov.mi	r0,0
+#endif /* ENDIAN */
+ARC_EXIT strchr
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000..5dc802b
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* This is optimized primarily for the ARC700.
+   It would be possible to speed up the loops by one cycle / word
+   respective one cycle / byte by forcing double source 1 alignment, unrolling
+   by a factor of two, and speculatively loading the second word / byte of
+   source 1; however, that would increase the overhead for loop setup / finish,
+   and strcmp might often terminate early.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcmp
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne	r2,0,.Lcharloop
+	mov_s	r12,0x01010101
+	ror	r5,r12
+.Lwordloop:
+	ld.ab	r2,[r0,4]
+	ld.ab	r3,[r1,4]
+	nop_s
+	sub	r4,r2,r12
+	bic	r4,r4,r2
+	and	r4,r4,r5
+	brne	r4,0,.Lfound0
+	breq	r2,r3,.Lwordloop
+#ifdef	__LITTLE_ENDIAN__
+	xor	r0,r2,r3	; mask for difference
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+#endif /* LITTLE ENDIAN */
+	cmp_s	r2,r3
+	mov_s	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+
+	.balign	4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+	xor	r0,r2,r3	; mask for difference
+	or	r0,r0,r4	; or in zero indicator
+	sub_s	r1,r0,1
+	bic_s	r0,r0,r1	; mask for least significant difference bit
+	sub	r1,r5,r0
+	xor	r0,r5,r1	; mask for least significant difference byte
+	and_s	r2,r2,r0
+	and_s	r3,r3,r0
+	sub.f	r0,r2,r3
+	mov.hi	r0,1
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#else /* BIG ENDIAN */
+	/* The zero-detection above can mis-detect 0x01 bytes as zeroes
+	   because of carry-propagateion from a lower significant zero byte.
+	   We can compensate for this by checking that bit0 is zero.
+	   This compensation is not necessary in the step where we
+	   get a low estimate for r2, because in any affected bytes
+	   we already have 0x00 or 0x01, which will remain unchanged
+	   when bit 7 is cleared.  */
+	.balign	4
+.Lfound0:
+	lsr	r0,r4,8
+	lsr_s	r1,r2
+	bic_s	r2,r2,r0	; get low estimate for r2 and get ...
+	bic_s	r0,r0,r1	; <this is the adjusted mask for zeros>
+	or_s	r3,r3,r0	; ... high estimate r3 so that r2 > r3 will ...
+	cmp_s	r3,r2		; ... be independent of trailing garbage
+	or_s	r2,r2,r0	; likewise for r3 > r2
+	bic_s	r3,r3,r0
+	rlc	r0,0		; r0 := r2 > r3 ? 1 : 0
+	cmp_s	r2,r3
+	j_s.d	[blink]
+	bset.lo	r0,r0,31
+#endif /* ENDIAN */
+
+	.balign	4
+.Lcharloop:
+	ldb.ab	r2,[r0,1]
+	ldb.ab	r3,[r1,1]
+	nop_s
+	breq	r2,0,.Lcmpend
+	breq	r2,r3,.Lcharloop
+.Lcmpend:
+	j_s.d	[blink]
+	sub	r0,r2,r3
+ARC_EXIT strcmp
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000..b7ca4ae
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+   it 8 byte aligned.  Thus, we can do a little read-ahead, without
+   dereferencing a cache line that we should not touch.
+   Note that short and long instructions have been scheduled to avoid
+   branch stalls.
+   The beq_s to r3z could be made unaligned & long to avoid a stall
+   there, but the it is not likely to be taken often, and it
+   would also be likey to cost an unaligned mispredict at the next call.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcpy
+	or	r2,r0,r1
+	bmsk_s	r2,r2,1
+	brne.d	r2,0,charloop
+	mov_s	r10,r0
+	ld_s	r3,[r1,0]
+	mov	r8,0x01010101
+	bbit0.d	r1,2,loop_start
+	ror	r12,r8
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne	r3z
+	mov_s	r4,r3
+	.balign 4
+loop:
+	ld.a	r3,[r1,4]
+	st.ab	r4,[r10,4]
+loop_start:
+	ld.a	r4,[r1,4]
+	sub	r2,r3,r8
+	bic_s	r2,r2,r3
+	tst_s	r2,r12
+	bne_s	r3z
+	st.ab	r3,[r10,4]
+	sub	r2,r4,r8
+	bic	r2,r2,r4
+	tst	r2,r12
+	beq	loop
+	mov_s	r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z:	bmsk.f	r1,r3,7
+	lsr_s	r3,r3,8
+#else
+r3z:	lsr.f	r1,r3,24
+	asl_s	r3,r3,8
+#endif
+	bne.d	r3z
+	stb.ab	r1,[r10,1]
+	j_s	[blink]
+
+	.balign	4
+charloop:
+	ldb.ab	r3,[r1,1]
+
+
+	brne.d	r3,0,charloop
+	stb.ab	r3,[r10,1]
+	j	[blink]
+ARC_EXIT strcpy
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000..39759e0
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strlen
+	or	r3,r0,7
+	ld	r2,[r3,-7]
+	ld.a	r6,[r3,-3]
+	mov	r4,0x01010101
+	; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+	asl_s	r1,r0,3
+	btst_s	r0,2
+	asl	r7,r4,r1
+	ror	r5,r4
+	sub	r1,r2,r7
+	bic_s	r1,r1,r2
+	mov.eq	r7,r4
+	sub	r12,r6,r7
+	bic	r12,r12,r6
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#else /* BIG ENDIAN */
+	ror	r5,r4
+	btst_s	r0,2
+	mov_s	r1,31
+	sub3	r7,r1,r0
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	bmsk	r1,r1,r7
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	bmsk.ne	r12,r12,r7
+	or.eq	r12,r12,r1
+	and	r12,r12,r5
+	brne	r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+	ld_s	r2,[r3,4]
+	ld.a	r6,[r3,8]
+	; stall for load result
+	sub	r1,r2,r4
+	bic_s	r1,r1,r2
+	sub	r12,r6,r4
+	bic	r12,r12,r6
+	or	r12,r12,r1
+	and	r12,r12,r5
+	breq r12,0,.Loop
+.Lend:
+	and.f	r1,r1,r5
+	sub.ne	r3,r3,4
+	mov.eq	r1,r12
+#ifdef __LITTLE_ENDIAN__
+	sub_s	r2,r1,1
+	bic_s	r2,r2,r1
+	norm	r1,r2
+	sub_s	r0,r0,3
+	lsr_s	r1,r1,3
+	sub	    r0,r3,r0
+	j_s.d	[blink]
+	sub	    r0,r0,r1
+#else /* BIG ENDIAN */
+	lsr_s	r1,r1,7
+	mov.eq	r2,r6
+	bic_s	r1,r1,r2
+	norm	r1,r1
+	sub	    r0,r3,r0
+	lsr_s	r1,r1,3
+	j_s.d	[blink]
+	add	    r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+	b.d	.Lend
+	sub_s.ne r1,r1,r1
+ARC_EXIT strlen
-- 
1.7.4.1

  parent reply	other threads:[~2012-11-07  9:47 UTC|newest]

Thread overview: 141+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-07  9:47 [RFC Patch v1 00/31] Synopsys ARC Linux kernel Port Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 01/31] ARC: Generic Headers Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 02/31] ARC: irqflags Vineet Gupta
2012-11-12 19:50   ` Thomas Gleixner
2013-01-01  7:44     ` Vineet Gupta
2013-01-01  7:44       ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 03/31] ARC: atomic/bitops/cmpxchg/barriers Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 04/31] asm-generic headers: uaccess.h to conditionally define segment_eq() Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 05/31] ARC: uaccess friends Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 06/31] asm-generic headers: Allow yet more arch overrides in checksum.h Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 07/31] ARC: checksum/byteorder/swab routines Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 08/31] ARC: Fundamental ARCH data-types/defines Vineet Gupta
2012-11-08  7:10   ` Jonas Bonn
2012-11-08 18:52     ` Vineet Gupta
2012-11-08 20:36       ` Jonas Bonn
2012-11-12 13:58         ` Vineet Gupta
2012-11-12 14:12           ` Arnd Bergmann
2012-11-07  9:47 ` [RFC PATCH v1 09/31] ARC: spinlock/rwlock/mutex primitives Vineet Gupta
2012-11-07  9:47 ` Vineet Gupta [this message]
2012-11-07  9:47 ` [RFC PATCH v1 11/31] ARC: Low level IRQ/Trap/Exception(non-MMU) Handling Vineet Gupta
2012-11-16  4:58   ` Al Viro
2012-12-27  9:00     ` Vineet Gupta
2012-12-27  9:00       ` Vineet Gupta
2012-12-27 13:29       ` Vineet Gupta
2012-12-27 13:29         ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 12/31] ARC: Interrupt Handling Vineet Gupta
2012-11-12 20:08   ` Thomas Gleixner
2013-01-01 10:46     ` Vineet Gupta
2013-01-01 10:46       ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 13/31] ARC: Non-MMU Exception Handling Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 14/31] ARC: syscall support Vineet Gupta
2012-11-07 14:21   ` Arnd Bergmann
2012-11-09  9:50     ` James Hogan
2012-11-09  9:50       ` James Hogan
2012-11-13 11:41       ` James Hogan
2012-11-13 11:41         ` James Hogan
2012-11-13 12:01         ` Jonas Bonn
2012-11-13 12:11           ` James Hogan
2012-11-13 12:11             ` James Hogan
2012-11-14 12:23             ` Arnd Bergmann
2012-11-14 12:31               ` James Hogan
2012-11-14 12:31                 ` James Hogan
2012-11-13 10:13     ` Gilad Ben-Yossef
2012-11-13 10:37       ` Arnd Bergmann
2012-11-15  6:15         ` Vineet Gupta
2012-11-15  6:15           ` Vineet Gupta
2012-11-15 12:35           ` Arnd Bergmann
2013-01-17  5:13             ` Vineet Gupta
2013-01-17  5:13               ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 15/31] ARC: Process/scheduling/clock/Timers/Delay Management Vineet Gupta
2012-11-12 20:29   ` Thomas Gleixner
2013-01-02  7:13     ` Vineet Gupta
2013-01-02  7:13       ` Vineet Gupta
2013-01-02  8:45       ` Vineet Gupta
2013-01-02  8:45         ` Vineet Gupta
2013-01-04 13:01       ` Frederic Weisbecker
2012-11-07  9:47 ` [RFC PATCH v1 16/31] ARC: Signal handling Vineet Gupta
2012-11-16  5:26   ` Al Viro
2012-12-28 12:34     ` Vineet Gupta
2012-12-28 12:34       ` Vineet Gupta
2012-12-28 12:42       ` [PATCH 1/2] ARC: [Review] Preparing to fix incorrect syscall restarts due to signals Vineet Gupta
2012-12-28 12:42         ` Vineet Gupta
2012-12-28 12:42         ` [PATCH 2/2] ARC: [Review] Prevent incorrect syscall restarts Vineet Gupta
2012-12-28 12:42           ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 17/31] ARC: Cache Flush Management Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 18/31] ARC: Page Table Management Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 19/31] ARC: MMU Context Management Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 20/31] ARC: MMU Exception Handling Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 21/31] ARC: TLB flush Handling Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 22/31] ARC: Page Fault handling (incl uaccess fixup) Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 23/31] ARC: I/O and DMA Mappings Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 24/31] ARC: startup #1: low-level, setup_arch(), /proc/cpuinfo, mem init Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 25/31] ARC: [plat-arcfpga] Hooking up platform to ARC UART Vineet Gupta
2012-11-07 14:16   ` Arnd Bergmann
2013-01-07 13:10     ` Vineet Gupta
2013-01-07 13:10       ` Vineet Gupta
2013-01-07 13:46       ` Arnd Bergmann
2013-01-07 14:04         ` Vineet Gupta
2013-01-07 14:04           ` Vineet Gupta
2013-01-07 14:36           ` Arnd Bergmann
2013-01-14  7:35     ` early init dt for earlyprintk (was Re: [RFC PATCH v1 25/31] ARC: [plat-arcfpga] Hooking up platform to ARC UART) Vineet Gupta
2013-01-14  7:35       ` Vineet Gupta
2013-01-14  9:48       ` James Hogan
2013-01-14  9:48         ` James Hogan
2013-01-14 10:09         ` Vineet Gupta
2013-01-14 10:09           ` Vineet Gupta
2013-01-14 10:54       ` Arnd Bergmann
2013-01-17  7:29     ` [RFC PATCH v1 25/31] ARC: [plat-arcfpga] Hooking up platform to ARC UART Vineet Gupta
2013-01-17  7:29       ` Vineet Gupta
2013-01-17 10:52       ` Arnd Bergmann
2012-11-07  9:47 ` [RFC PATCH v1 26/31] ARC: Build system: Makefiles, Kconfig, Linker script Vineet Gupta
2012-11-07 14:13   ` Arnd Bergmann
2013-01-02 14:30     ` Vineet Gupta
2013-01-02 14:48       ` Arnd Bergmann
2013-01-03  7:58         ` Vineet Gupta
2013-01-03  7:58           ` Vineet Gupta
2013-01-03  8:25           ` Arnd Bergmann
2013-03-11 12:29     ` SYSV IPC broken for no-legacy syscall kernels (was Re: [RFC PATCH v1 26/31] ARC: Build system: Makefiles, Kconfig, Linker script) Vineet Gupta
2013-03-11 12:29       ` Vineet Gupta
2013-03-11 12:44       ` James Hogan
2013-03-11 12:44         ` James Hogan
2013-03-11 12:56         ` Vineet Gupta
2013-03-11 12:56           ` Vineet Gupta
2013-03-11 13:07           ` James Hogan
2013-03-11 13:07             ` James Hogan
2013-03-11 13:30             ` Arnd Bergmann
2013-03-11 13:48               ` Vineet Gupta
2013-03-11 13:48                 ` Vineet Gupta
2013-03-11 14:50                 ` Arnd Bergmann
2012-11-15 17:49   ` [RFC PATCH v1 26/31] ARC: Build system: Makefiles, Kconfig, Linker script James Hogan
2012-11-15 17:49     ` James Hogan
2012-11-15 19:30     ` Ralf Baechle
2012-11-16  6:36       ` Vineet Gupta
2012-11-16  6:36         ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 27/31] ARC: Last bits (stubs) to get to a running kernel with UART Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 28/31] ARC: split ret_from_fork, simplify kernel_thread() Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 29/31] ARC: switch to generic kernel_thread() Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 30/31] ARC: switch to generic kernel_execve() and sys_execve() Vineet Gupta
2012-11-16  4:08   ` Al Viro
2012-11-17 14:01     ` Vineet Gupta
2012-11-17 14:01       ` Vineet Gupta
2012-11-07  9:47 ` [RFC PATCH v1 31/31] ARC: [plat-arcfpga] defconfig Vineet Gupta
2012-11-07 14:06   ` Arnd Bergmann
2012-11-12 14:18     ` James Hogan
2012-11-12 14:18       ` James Hogan
2012-11-12 14:21       ` Arnd Bergmann
2012-11-07 14:36 ` [RFC Patch v1 00/31] Synopsys ARC Linux kernel Port Arnd Bergmann
2012-11-08 19:09   ` Vineet Gupta
2012-11-07 20:46 ` Gilad Ben-Yossef
2012-11-20 13:47 ` Pavel Machek
2012-11-20 13:49   ` Vineet Gupta
2012-11-20 13:49     ` Vineet Gupta
2012-11-20 13:59   ` Pavel Machek
2012-11-20 14:17     ` Vineet Gupta
2012-11-20 14:17       ` Vineet Gupta
2013-01-18 19:46       ` Pavel Machek
2013-01-18 22:17         ` Arnd Bergmann
2013-01-19 10:15           ` Pavel Machek
2013-01-19 12:32         ` Vineet Gupta
2013-01-19 12:32           ` Vineet Gupta
2013-01-19 17:02           ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1352281674-2186-11-git-send-email-vgupta@synopsys.com \
    --to=vineet.gupta1@synopsys.com \
    --cc=arnd@arndb.de \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.