All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <ak@muc.de>
To: linux-kernel@vger.kernel.org
Subject: [PATCH] Call for testers - fixed RAID5 XOR functions
Date: Wed, 5 Jun 2002 15:38:34 +0200	[thread overview]
Message-ID: <20020605153834.A15485@averell> (raw)


While porting xor.h to x86-64 I noticed that the accelerated RAID-5 XOR
functions for i386 used incorrect inline assembly. The were clobbering
upto 7 registers without gcc telling it. It is pure luck that it worked
so far, although I'm dubious about it for the versions with 4 and 5 inputs
(has that been really tested? - they clobber 5 or 6 registers and unless it 
was pure luck that the caller had some dead registers in one particular
compiler output it must have caused data corruptions) 

I unfortunately don't have a RAID-5 so I cannot test. From visual inspection
the new assembly seems to be correct. I would be grateful if someone with
RAID-5 could test it by doing some writes and then removing one disk and
testing recovery. Best would be tests with 3,4,5,6 disks, but only specific
cases will do also. 

I did also align the XMM saves on 16 bytes to save a few cycles.

Here is the patch for 2.4.19pre9 (but should apply to most 2.4 and 2.5) 

-Andi

--- linux-2.4.19pre9/include/asm-i386/xor.h	Fri Sep 14 23:25:21 2001
+++ linux-2.4.19pre9-work/include/asm-i386/xor.h	Wed Jun  5 15:06:40 2002
@@ -76,9 +76,9 @@
 	"       addl $128, %2         ;\n"
 	"       decl %0               ;\n"
 	"       jnz 1b                ;\n"
-       	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
 	: "memory");
 
 	FPU_RESTORE;
@@ -126,9 +126,9 @@
 	"       addl $128, %3         ;\n"
 	"       decl %0               ;\n"
 	"       jnz 1b                ;\n"
-       	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	:
 	: "memory");
 
 	FPU_RESTORE;
@@ -181,14 +181,15 @@
 	"       addl $128, %4         ;\n"
 	"       decl %0               ;\n"
 	"       jnz 1b                ;\n"
-       	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
 	: "memory");
 
 	FPU_RESTORE;
 }
 
+
 static void
 xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 	      unsigned long *p3, unsigned long *p4, unsigned long *p5)
@@ -198,7 +199,11 @@
 
 	FPU_SAVE;
 
+	/* need to save/restore p4/p5 manually otherwise gcc's 10 argument
+	   limit gets exceeded (+ counts as two arguments) */
 	__asm__ __volatile__ (
+		"  pushl %4\n"
+		"  pushl %5\n"
 #undef BLOCK
 #define BLOCK(i) \
 	LD(i,0)					\
@@ -241,9 +246,11 @@
 	"       addl $128, %5         ;\n"
 	"       decl %0               ;\n"
 	"       jnz 1b                ;\n"
-       	:
-	: "g" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+	"	popl %5\n"
+	"	popl %4\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5) 
 	: "memory");
 
 	FPU_RESTORE;
@@ -297,9 +304,9 @@
 	"       addl $64, %2         ;\n"
 	"       decl %0              ;\n"
 	"       jnz 1b               ;\n"
-	: 
-	: "r" (lines),
-	  "r" (p1), "r" (p2)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
+	:
 	: "memory");
 
 	FPU_RESTORE;
@@ -355,9 +362,9 @@
 	"       addl $64, %3         ;\n"
 	"       decl %0              ;\n"
 	"       jnz 1b               ;\n"
-	: 
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	:
 	: "memory" );
 
 	FPU_RESTORE;
@@ -422,9 +429,9 @@
 	"       addl $64, %4         ;\n"
 	"       decl %0              ;\n"
 	"       jnz 1b               ;\n"
-	: 
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
+	:
 	: "memory");
 
 	FPU_RESTORE;
@@ -439,7 +446,10 @@
 
 	FPU_SAVE;
 
+	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
 	__asm__ __volatile__ (
+	"	pushl %4\n"
+	"	pushl %5\n"        	
 	" .align 32,0x90             ;\n"
 	" 1:                         ;\n"
 	"       movq   (%1), %%mm0   ;\n"
@@ -498,9 +508,11 @@
 	"       addl $64, %5         ;\n"
 	"       decl %0              ;\n"
 	"       jnz 1b               ;\n"
-	: 
-	: "g" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+	"	popl %5\n"
+	"	popl %4\n"
+	: "+g" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5)
 	: "memory");
 
 	FPU_RESTORE;
@@ -554,6 +566,8 @@
 		: "r" (cr0), "r" (xmm_save)	\
 		: "memory")
 
+#define ALIGN16 __attribute__((aligned(16)))
+
 #define OFFS(x)		"16*("#x")"
 #define PF_OFFS(x)	"256+16*("#x")"
 #define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
@@ -575,7 +589,7 @@
 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 {
         unsigned long lines = bytes >> 8;
-	char xmm_save[16*4];
+	char xmm_save[16*4] ALIGN16;
 	int cr0;
 
 	XMMS_SAVE;
@@ -616,9 +630,9 @@
         "       addl $256, %2           ;\n"
         "       decl %0                 ;\n"
         "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2)
 	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2)
         : "memory");
 
 	XMMS_RESTORE;
@@ -629,7 +643,7 @@
 	  unsigned long *p3)
 {
         unsigned long lines = bytes >> 8;
-	char xmm_save[16*4];
+	char xmm_save[16*4] ALIGN16;
 	int cr0;
 
 	XMMS_SAVE;
@@ -677,9 +691,9 @@
         "       addl $256, %3           ;\n"
         "       decl %0                 ;\n"
         "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r"(p2), "+r"(p3)
 	:
-	: "r" (lines),
-	  "r" (p1), "r"(p2), "r"(p3)
         : "memory" );
 
 	XMMS_RESTORE;
@@ -690,7 +704,7 @@
 	  unsigned long *p3, unsigned long *p4)
 {
         unsigned long lines = bytes >> 8;
-	char xmm_save[16*4];
+	char xmm_save[16*4] ALIGN16;
 	int cr0;
 
 	XMMS_SAVE;
@@ -745,9 +759,9 @@
         "       addl $256, %4           ;\n"
         "       decl %0                 ;\n"
         "       jnz 1b                  ;\n"
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
 	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4)
         : "memory" );
 
 	XMMS_RESTORE;
@@ -758,12 +772,15 @@
 	  unsigned long *p3, unsigned long *p4, unsigned long *p5)
 {
         unsigned long lines = bytes >> 8;
-	char xmm_save[16*4];
+	char xmm_save[16*4] ALIGN16;
 	int cr0;
 
 	XMMS_SAVE;
 
+	/* need to save p4/p5 manually to not exceed gcc's 10 argument limit */
         __asm__ __volatile__ (
+		" pushl %4\n"
+		" pushl %5\n"
 #undef BLOCK
 #define BLOCK(i) \
 		PF1(i)					\
@@ -820,9 +837,11 @@
         "       addl $256, %5           ;\n"
         "       decl %0                 ;\n"
         "       jnz 1b                  ;\n"
-	:
-	: "r" (lines),
-	  "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+	"	popl %5\n"	
+	"	popl %4\n"	
+	: "+r" (lines),
+	  "+r" (p1), "+r" (p2), "+r" (p3)
+	: "r" (p4), "r" (p5)
 	: "memory");
 
 	XMMS_RESTORE;


                 reply	other threads:[~2002-06-05 13:38 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20020605153834.A15485@averell \
    --to=ak@muc.de \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.