From: Grant Grundler <iod00d@hp.com>
To: linux-ia64@vger.kernel.org
Subject: PATCH 2.4.23-pre6 gcc-3.3 support (xor.S)
Date: Thu, 16 Oct 2003 19:16:36 +0000 [thread overview]
Message-ID: <marc-linux-ia64-106633200522585@msgid-missing> (raw)
Hi Bjorn,
gcc-3.3 doesn't like the asm("") spread out over 250 lines.
I gather gcc-3.3 doesn't like line breaks inside a string.
Besides, it just feels wrong to have a 250 line asm().
Fortunately 2.6 already fixes this problem. Here's a "back port"
of xor.S from davidm's linux-ia64-2.5 bk tree.
BTW, since the function prototypes in xor.h are identical,
I've assumed 2.6 xor.S is functionally identical/compatible too.
grant
diff -Nur --exclude=RCS --exclude=CVS --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-ia64-2.4-orig/arch/ia64/lib/Makefile linux-ia64-2.4/arch/ia64/lib/Makefile
--- linux-ia64-2.4-orig/arch/ia64/lib/Makefile Thu Oct 16 11:57:49 2003
+++ linux-ia64-2.4/arch/ia64/lib/Makefile Thu Oct 16 09:20:36 2003
@@ -18,6 +18,7 @@
obj-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
+obj-$(CONFIG_MD_RAID5) += xor.o
IGNORE_FLAGS_OBJS = __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
__divdi3.o __udivdi3.o __moddi3.o __umoddi3.o
diff -Nur --exclude=RCS --exclude=CVS --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-ia64-2.4-orig/arch/ia64/lib/xor.S linux-ia64-2.4/arch/ia64/lib/xor.S
--- linux-ia64-2.4-orig/arch/ia64/lib/xor.S Wed Dec 31 16:00:00 1969
+++ linux-ia64-2.4/arch/ia64/lib/xor.S Thu Oct 16 09:19:20 2003
@@ -0,0 +1,184 @@
+/*
+ * arch/ia64/lib/xor.S
+ *
+ * Optimized RAID-5 checksumming functions for IA-64.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(xor_ia64_2)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 3, 0, 13, 16
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ .rotr s1[6+1], s2[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[6+1])st8.nta [r8] = d[1], 8
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_2)
+
+GLOBAL_ENTRY(xor_ia64_3)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 4, 0, 20, 24
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+ ;;
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], s3[6]
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_3)
+
+GLOBAL_ENTRY(xor_ia64_4)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 5, 0, 27, 32
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ mov r19 = in4
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r20 = s3[6], s4[6]
+ ;;
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r20
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_4)
+
+GLOBAL_ENTRY(xor_ia64_5)
+ .prologue
+ .fframe 0
+ .save ar.pfs, r31
+ alloc r31 = ar.pfs, 6, 0, 34, 40
+ .save ar.lc, r30
+ mov r30 = ar.lc
+ .save pr, r29
+ mov r29 = pr
+ ;;
+ .body
+ mov r8 = in1
+ mov ar.ec = 6 + 2
+ shr in0 = in0, 3
+ ;;
+ adds in0 = -1, in0
+ mov r16 = in1
+ mov r17 = in2
+ ;;
+ mov r18 = in3
+ mov ar.lc = in0
+ mov pr.rot = 1 << 16
+ mov r19 = in4
+ mov r20 = in5
+ ;;
+ .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
+ .rotp p[6+2]
+0:
+(p[0]) ld8.nta s1[0] = [r16], 8
+(p[0]) ld8.nta s2[0] = [r17], 8
+(p[6]) xor d[0] = s1[6], s2[6]
+(p[0]) ld8.nta s3[0] = [r18], 8
+(p[0]) ld8.nta s4[0] = [r19], 8
+(p[6]) xor r21 = s3[6], s4[6]
+ ;;
+(p[0]) ld8.nta s5[0] = [r20], 8
+(p[6+1])st8.nta [r8] = d[1], 8
+(p[6]) xor d[0] = d[0], r21
+ ;;
+(p[6]) xor d[0] = d[0], s5[6]
+ nop.f 0
+ br.ctop.dptk.few 0b
+ ;;
+ mov ar.lc = r30
+ mov pr = r29, -1
+ br.ret.sptk.few rp
+END(xor_ia64_5)
diff -Nur --exclude=RCS --exclude=CVS --exclude=SCCS --exclude=BitKeeper --exclude=ChangeSet linux-ia64-2.4-orig/include/asm-ia64/xor.h linux-ia64-2.4/include/asm-ia64/xor.h
--- linux-ia64-2.4-orig/include/asm-ia64/xor.h Thu Oct 16 11:57:50 2003
+++ linux-ia64-2.4/include/asm-ia64/xor.h Thu Oct 16 09:18:08 2003
@@ -23,256 +23,6 @@
extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
unsigned long *, unsigned long *, unsigned long *);
-asm ("
- .text
-
- // Assume L2 memory latency of 6 cycles.
-
- .proc xor_ia64_2
-xor_ia64_2:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 3, 0, 13, 16
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- ;;
- }
- .rotr s1[6+1], s2[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mfb
-(p[6+1]) st8.nta [r8] = d[1], 8
- nop.f 0
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_2
-
- .proc xor_ia64_3
-xor_ia64_3:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 4, 0, 20, 24
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- ;;
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], s3[6]
- }
- { .bbb
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_3
-
- .proc xor_ia64_4
-xor_ia64_4:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 5, 0, 27, 32
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- }
- { .mfb
- mov r19 = in4
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[0]) ld8.nta s4[0] = [r19], 8
-(p[6]) xor r20 = s3[6], s4[6]
- ;;
- }
- { .mib
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], r20
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_4
-
- .proc xor_ia64_5
-xor_ia64_5:
- .prologue
- .fframe 0
- { .mii
- .save ar.pfs, r31
- alloc r31 = ar.pfs, 6, 0, 34, 40
- .save ar.lc, r30
- mov r30 = ar.lc
- .save pr, r29
- mov r29 = pr
- ;;
- }
- .body
- { .mii
- mov r8 = in1
- mov ar.ec = 6 + 2
- shr in0 = in0, 3
- ;;
- }
- { .mmi
- adds in0 = -1, in0
- mov r16 = in1
- mov r17 = in2
- ;;
- }
- { .mii
- mov r18 = in3
- mov ar.lc = in0
- mov pr.rot = 1 << 16
- }
- { .mib
- mov r19 = in4
- mov r20 = in5
- ;;
- }
- .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
- .rotp p[6+2]
-0: { .mmi
-(p[0]) ld8.nta s1[0] = [r16], 8
-(p[0]) ld8.nta s2[0] = [r17], 8
-(p[6]) xor d[0] = s1[6], s2[6]
- }
- { .mmi
-(p[0]) ld8.nta s3[0] = [r18], 8
-(p[0]) ld8.nta s4[0] = [r19], 8
-(p[6]) xor r21 = s3[6], s4[6]
- ;;
- }
- { .mmi
-(p[0]) ld8.nta s5[0] = [r20], 8
-(p[6+1]) st8.nta [r8] = d[1], 8
-(p[6]) xor d[0] = d[0], r21
- ;;
- }
- { .mfb
-(p[6]) xor d[0] = d[0], s5[6]
- nop.f 0
- br.ctop.dptk.few 0b
- ;;
- }
- { .mii
- mov ar.lc = r30
- mov pr = r29, -1
- }
- { .bbb
- br.ret.sptk.few rp
- }
- .endp xor_ia64_5
-");
-
static struct xor_block_template xor_block_ia64 = {
name: "ia64",
do_2: xor_ia64_2,
reply other threads:[~2003-10-16 19:16 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=marc-linux-ia64-106633200522585@msgid-missing \
--to=iod00d@hp.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.