linux-raid.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* lib/raid6: SSSE3 optimized recovery functions
@ 2012-03-16 23:06 Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Hi Folks,

The following patchset adds SSSE3 optimized recovery
functions to RAID6.

A technical description of the algorithm can be found
at http://www.kernel.org/pub/linux/kernel/people/hpa/raid6.pdf

Thanks.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] lib/raid6: Add SSSE3 optimized recovery functions.
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
  2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas
  2 siblings, 0 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Add SSSE3 optimized recovery functions, as well as a system
for selecting the most appropriate recovery functions to use.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Originally-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/raid/pq.h |   18 +++-
 lib/raid6/algos.c       |   38 ++++++
 lib/raid6/mktables.c    |   25 ++++
 lib/raid6/recov.c       |  341 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 416 insertions(+), 6 deletions(-)

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 53272e9..640c69c 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
 
+struct raid6_recov_calls {
+	void (*data2)(int, size_t, int, int, void **);
+	void (*datap)(int, size_t, int, void **);
+	int  (*valid)(void);
+	const char *name;
+	int priority;
+};
+
+extern const struct raid6_recov_calls raid6_recov_intx1;
+extern const struct raid6_recov_calls raid6_recov_ssse3;
+
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
+extern const struct raid6_recov_calls *const raid6_recov_algos[];
 int raid6_select_algo(void);
 
 /* Return values from chk_syndrome */
@@ -111,14 +123,16 @@ int raid6_select_algo(void);
 
 /* Galois field tables */
 extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
+extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
 extern const u8 raid6_gfexp[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfinv[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfexi[256]      __attribute__((aligned(256)));
 
 /* Recovery routines */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb,
 		       void **ptrs);
-void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
+extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila,
+			void **ptrs);
 void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 		      void **ptrs);
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 8b02f60..a3ac58a 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,7 @@
 
 #include <linux/raid/pq.h>
 #include <linux/module.h>
+#include "x86.h"
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
@@ -64,6 +65,20 @@ const struct raid6_calls * const raid6_algos[] = {
 	NULL
 };
 
+void (*raid6_2data_recov)(int, size_t, int, int, void **);
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
+
+void (*raid6_datap_recov)(int, size_t, int, void **);
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+const struct raid6_recov_calls *const raid6_recov_algos[] = {
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+	&raid6_recov_ssse3,
+#endif
+	&raid6_recov_intx1,
+	NULL
+};
+
 #ifdef __KERNEL__
 #define RAID6_TIME_JIFFIES_LG2	4
 #else
@@ -72,6 +87,26 @@ const struct raid6_calls * const raid6_algos[] = {
 #define time_before(x, y) ((x) < (y))
 #endif
 
+static inline void raid6_choose_recov(void)
+{
+	const struct raid6_recov_calls *const *algo;
+	const struct raid6_recov_calls *best;
+
+	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
+		if (!best || (*algo)->priority > best->priority)
+			if (!(*algo)->valid || (*algo)->valid())
+				best = *algo;
+
+	if (best) {
+		raid6_2data_recov = best->data2;
+		raid6_datap_recov = best->datap;
+
+		printk("raid6: using %s recovery algorithm\n", best->name);
+	} else
+		printk("raid6: Yikes! No recovery algorithm found!\n");
+}
+
+
 /* Try to pick the best algorithm */
 /* This code uses the gfmul table as convenient data set to abuse */
 
@@ -141,6 +176,9 @@ int __init raid6_select_algo(void)
 
 	free_pages((unsigned long)syndromes, 1);
 
+	/* select raid recover functions */
+	raid6_choose_recov();
+
 	return best ? 0 : -EINVAL;
 }
 
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 8a37809..39787db 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -81,6 +81,31 @@ int main(int argc, char *argv[])
 	printf("EXPORT_SYMBOL(raid6_gfmul);\n");
 	printf("#endif\n");
 
+	/* Compute vector multiplication table */
+	printf("\nconst u8  __attribute__((aligned(256)))\n"
+		"raid6_vgfmul[256][32] =\n"
+		"{\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t{\n");
+		for (j = 0; j < 16; j += 8) {
+			printf("\t\t");
+			for (k = 0; k < 8; k++)
+				printf("0x%02x,%c", gfmul(i, j + k),
+				       (k == 7) ? '\n' : ' ');
+		}
+		for (j = 0; j < 16; j += 8) {
+			printf("\t\t");
+			for (k = 0; k < 8; k++)
+				printf("0x%02x,%c", gfmul(i, (j + k) << 4),
+				       (k == 7) ? '\n' : ' ');
+		}
+		printf("\t},\n");
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_vgfmul);\n");
+	printf("#endif\n");
+
 	/* Compute power-of-2 table (exponent) */
 	v = 1;
 	printf("\nconst u8 __attribute__((aligned(256)))\n"
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
index fe275d7..ebaa307 100644
--- a/lib/raid6/recov.c
+++ b/lib/raid6/recov.c
@@ -20,9 +20,17 @@
 
 #include <linux/export.h>
 #include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_ssse3(void)
+{
+	return boot_cpu_has(X86_FEATURE_XMM) &&
+		boot_cpu_has(X86_FEATURE_XMM2) &&
+		boot_cpu_has(X86_FEATURE_SSSE3);
+}
 
 /* Recover two failed data blocks. */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,
 		       void **ptrs)
 {
 	u8 *p, *q, *dp, *dq;
@@ -64,10 +72,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
 		p++; q++;
 	}
 }
-EXPORT_SYMBOL_GPL(raid6_2data_recov);
 
 /* Recover failure of one data block plus the P block */
-void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
+void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs)
 {
 	u8 *p, *q, *dq;
 	const u8 *qmul;		/* Q multiplier table */
@@ -96,7 +103,333 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
 		q++; dq++;
 	}
 }
-EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+
+const struct raid6_recov_calls raid6_recov_intx1 = {
+	.data2 = raid6_2data_recov_intx1,
+	.datap = raid6_datap_recov_intx1,
+	.valid = NULL,
+	.name = "intx1",
+	.priority = 0,
+};
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb,
+		       void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	static const u8 __attribute__((aligned(16))) x0f[16] = {
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+		raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
+
+#ifdef CONFIG_X86_64
+	asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
+	asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
+	asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
+#endif
+
+	/* Now do it... */
+	while (bytes) {
+#ifdef CONFIG_X86_64
+	/* xmm6, xmm14, xmm15 */
+
+		asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
+		asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
+		asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
+		asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
+		asm volatile("pxor   %0,%%xmm1" : : "m" (dq[0]));
+		asm volatile("pxor   %0,%%xmm9" : : "m" (dq[16]));
+		asm volatile("pxor   %0,%%xmm0" : : "m" (dp[0]));
+		asm volatile("pxor   %0,%%xmm8" : : "m" (dp[16]));
+
+		/* xmm0/8 = px */
+
+		asm volatile("movdqa %xmm6,%xmm4");
+		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
+		asm volatile("movdqa %xmm6,%xmm12");
+		asm volatile("movdqa %0,%%xmm13" : : "m" (qmul[16]));
+		asm volatile("movdqa %xmm1,%xmm3");
+		asm volatile("movdqa %xmm9,%xmm11");
+		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
+		asm volatile("movdqa %xmm8,%xmm10");
+		asm volatile("psraw  $4,%xmm1");
+		asm volatile("psraw  $4,%xmm9");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm11");
+		asm volatile("pand   %xmm7,%xmm1");
+		asm volatile("pand   %xmm7,%xmm9");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm11,%xmm12");
+		asm volatile("pshufb %xmm1,%xmm5");
+		asm volatile("pshufb %xmm9,%xmm13");
+		asm volatile("pxor   %xmm4,%xmm5");
+		asm volatile("pxor   %xmm12,%xmm13");
+
+		/* xmm5/13 = qx */
+
+		asm volatile("movdqa %xmm14,%xmm4");
+		asm volatile("movdqa %xmm15,%xmm1");
+		asm volatile("movdqa %xmm14,%xmm12");
+		asm volatile("movdqa %xmm15,%xmm9");
+		asm volatile("movdqa %xmm2,%xmm3");
+		asm volatile("movdqa %xmm10,%xmm11");
+		asm volatile("psraw  $4,%xmm2");
+		asm volatile("psraw  $4,%xmm10");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm11");
+		asm volatile("pand   %xmm7,%xmm2");
+		asm volatile("pand   %xmm7,%xmm10");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm11,%xmm12");
+		asm volatile("pshufb %xmm2,%xmm1");
+		asm volatile("pshufb %xmm10,%xmm9");
+		asm volatile("pxor   %xmm4,%xmm1");
+		asm volatile("pxor   %xmm12,%xmm9");
+
+		/* xmm1/9 = pbmul[px] */
+		asm volatile("pxor   %xmm5,%xmm1");
+		asm volatile("pxor   %xmm13,%xmm9");
+		/* xmm1/9 = db = DQ */
+		asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
+
+		asm volatile("pxor   %xmm1,%xmm0");
+		asm volatile("pxor   %xmm9,%xmm8");
+		asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
+		asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dp += 32;
+		dq += 32;
+#else
+		asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
+		asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
+		asm volatile("pxor   %0,%%xmm1" : : "m" (*dq));
+		asm volatile("pxor   %0,%%xmm0" : : "m" (*dp));
+
+		/* 1 = dq ^ q
+		 * 0 = dp ^ p
+		 */
+		asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
+		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
+
+		asm volatile("movdqa %xmm1,%xmm3");
+		asm volatile("psraw  $4,%xmm1");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm1");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm1,%xmm5");
+		asm volatile("pxor   %xmm4,%xmm5");
+
+		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
+
+		/* xmm5 = qx */
+
+		asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
+		asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
+		asm volatile("movdqa %xmm2,%xmm3");
+		asm volatile("psraw  $4,%xmm2");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm2");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm2,%xmm1");
+		asm volatile("pxor   %xmm4,%xmm1");
+
+		/* xmm1 = pbmul[px] */
+		asm volatile("pxor   %xmm5,%xmm1");
+		/* xmm1 = db = DQ */
+		asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
+
+		asm volatile("pxor   %xmm1,%xmm0");
+		asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dp += 16;
+		dq += 16;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+
+void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	static const u8 __attribute__((aligned(16))) x0f[16] = {
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
+		asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
+		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
+		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
+
+		/* xmm3 = q[0] ^ dq[0] */
+
+		asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
+		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
+
+		/* xmm4 = q[16] ^ dq[16] */
+
+		asm volatile("movdqa %xmm3, %xmm6");
+		asm volatile("movdqa %xmm4, %xmm8");
+
+		/* xmm4 = xmm8 = q[16] ^ dq[16] */
+
+		asm volatile("psraw $4, %xmm3");
+		asm volatile("pand %xmm7, %xmm6");
+		asm volatile("pand %xmm7, %xmm3");
+		asm volatile("pshufb %xmm6, %xmm0");
+		asm volatile("pshufb %xmm3, %xmm1");
+		asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
+		asm volatile("pxor %xmm0, %xmm1");
+		asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
+
+		/* xmm1 = qmul[q[0] ^ dq[0]] */
+
+		asm volatile("psraw $4, %xmm4");
+		asm volatile("pand %xmm7, %xmm8");
+		asm volatile("pand %xmm7, %xmm4");
+		asm volatile("pshufb %xmm8, %xmm10");
+		asm volatile("pshufb %xmm4, %xmm11");
+		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
+		asm volatile("pxor %xmm10, %xmm11");
+		asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
+
+		/* xmm11 = qmul[q[16] ^ dq[16]] */
+
+		asm volatile("pxor %xmm1, %xmm2");
+
+		/* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
+
+		asm volatile("pxor %xmm11, %xmm12");
+
+		/* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
+
+		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
+
+		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
+		asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dq += 32;
+
+#else
+		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
+		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
+		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
+		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
+
+		/* xmm3 = *q ^ *dq */
+
+		asm volatile("movdqa %xmm3, %xmm6");
+		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
+		asm volatile("psraw $4, %xmm3");
+		asm volatile("pand %xmm7, %xmm6");
+		asm volatile("pand %xmm7, %xmm3");
+		asm volatile("pshufb %xmm6, %xmm0");
+		asm volatile("pshufb %xmm3, %xmm1");
+		asm volatile("pxor %xmm0, %xmm1");
+
+		/* xmm1 = qmul[*q ^ *dq */
+
+		asm volatile("pxor %xmm1, %xmm2");
+
+		/* xmm2 = *p ^ qmul[*q ^ *dq] */
+
+		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dq += 16;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_ssse3 = {
+	.data2 = raid6_2data_recov_ssse3,
+	.datap = raid6_datap_recov_ssse3,
+	.valid = raid6_has_ssse3,
+#ifdef CONFIG_X86_64
+	.name = "ssse3x2",
+#else
+	.name = "ssse3x1",
+#endif
+	.priority = 1,
+};
+
+#endif
+
 
 #ifndef __KERNEL__
 /* Testing only */
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2012-03-17  8:52   ` Paul Menzel
  2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas
  2 siblings, 1 reply; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Test each combination of recovery and syndrome generation
functions.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
---
 lib/raid6/algos.c     |    2 +-
 lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
 lib/raid6/x86.h       |   13 ++++++++-----
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index a3ac58a..82cdd01 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -17,12 +17,12 @@
  */
 
 #include <linux/raid/pq.h>
-#include <linux/module.h>
 #include "x86.h"
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
 #else
+#include <linux/module.h>
 #include <linux/gfp.h>
 #if !RAID6_USE_EMPTY_ZERO_PAGE
 /* In .bss so it's zeroed */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 7a93031..5a485b7 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -90,25 +90,35 @@ static int test_disks(int i, int j)
 int main(int argc, char *argv[])
 {
 	const struct raid6_calls *const *algo;
+	const struct raid6_recov_calls *const *ra;
 	int i, j;
 	int err = 0;
 
 	makedata();
 
-	for (algo = raid6_algos; *algo; algo++) {
-		if (!(*algo)->valid || (*algo)->valid()) {
-			raid6_call = **algo;
+	for (ra = raid6_recov_algos; *ra; ra++) {
+		if ((*ra)->valid  && !(*ra)->valid())
+			continue;
+		raid6_2data_recov = (*ra)->data2;
+		raid6_datap_recov = (*ra)->datap;
 
-			/* Nuke syndromes */
-			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+		printf("using recovery %s\n", (*ra)->name);
 
-			/* Generate assumed good syndrome */
-			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
-						(void **)&dataptrs);
+		for (algo = raid6_algos; *algo; algo++) {
+			if (!(*algo)->valid || (*algo)->valid()) {
+				raid6_call = **algo;
 
-			for (i = 0; i < NDISKS-1; i++)
-				for (j = i+1; j < NDISKS; j++)
-					err += test_disks(i, j);
+				/* Nuke syndromes */
+				memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+				/* Generate assumed good syndrome */
+				raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+							(void **)&dataptrs);
+
+				for (i = 0; i < NDISKS-1; i++)
+					for (j = i+1; j < NDISKS; j++)
+						err += test_disks(i, j);
+			}
 		}
 		printf("\n");
 	}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index cb2a8c9..9aff51f 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -40,19 +40,22 @@ static inline void kernel_fpu_end(void)
 					   * (fast save and restore) */
 #define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
 #define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
 
 /* Should work well enough on modern CPUs for testing */
 static inline int boot_cpu_has(int flag)
 {
-	u32 eax = (flag >> 5) ? 0x80000001 : 1;
-	u32 edx;
+	u32 eax = (flag & 0x20) ? 0x80000001 : 1;
+	u32 ecx, edx;
 
 	asm volatile("cpuid"
-		     : "+a" (eax), "=d" (edx)
-		     : : "ecx", "ebx");
+		     : "+a" (eax), "=d" (edx), "=c" (ecx)
+		     : : "ebx");
 
-	return (edx >> (flag & 31)) & 1;
+	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
 }
 
 #endif /* ndef __KERNEL__ */
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2 siblings, 0 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Reorders functions in raid6_algos as well as preference check
to reduce the number of functions tested on initialization.

Also, creates symmetry between choosing the gen_syndrome functions
and choosing the recovery functions.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
---
 lib/raid6/algos.c |  104 +++++++++++++++++++++++++++++------------------------
 1 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 82cdd01..f035942 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -35,10 +35,6 @@ struct raid6_calls raid6_call;
 EXPORT_SYMBOL_GPL(raid6_call);
 
 const struct raid6_calls * const raid6_algos[] = {
-	&raid6_intx1,
-	&raid6_intx2,
-	&raid6_intx4,
-	&raid6_intx8,
 #if defined(__ia64__)
 	&raid6_intx16,
 	&raid6_intx32,
@@ -62,6 +58,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec4,
 	&raid6_altivec8,
 #endif
+	&raid6_intx1,
+	&raid6_intx2,
+	&raid6_intx4,
+	&raid6_intx8,
 	NULL
 };
 
@@ -87,7 +87,7 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #define time_before(x, y) ((x) < (y))
 #endif
 
-static inline void raid6_choose_recov(void)
+static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 {
 	const struct raid6_recov_calls *const *algo;
 	const struct raid6_recov_calls *best;
@@ -104,62 +104,38 @@ static inline void raid6_choose_recov(void)
 		printk("raid6: using %s recovery algorithm\n", best->name);
 	} else
 		printk("raid6: Yikes! No recovery algorithm found!\n");
-}
-
 
-/* Try to pick the best algorithm */
-/* This code uses the gfmul table as convenient data set to abuse */
+	return best;
+}
 
-int __init raid6_select_algo(void)
+static inline const struct raid6_calls *raid6_choose_gen(
+	void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
 {
-	const struct raid6_calls * const * algo;
-	const struct raid6_calls * best;
-	char *syndromes;
-	void *dptrs[(65536/PAGE_SIZE)+2];
-	int i, disks;
-	unsigned long perf, bestperf;
-	int bestprefer;
-	unsigned long j0, j1;
-
-	disks = (65536/PAGE_SIZE)+2;
-	for ( i = 0 ; i < disks-2 ; i++ ) {
-		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
-	}
-
-	/* Normal code - use a 2-page allocation to avoid D$ conflict */
-	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
-
-	if ( !syndromes ) {
-		printk("raid6: Yikes!  No memory available.\n");
-		return -ENOMEM;
-	}
-
-	dptrs[disks-2] = syndromes;
-	dptrs[disks-1] = syndromes + PAGE_SIZE;
+	unsigned long perf, bestperf, j0, j1;
+	const struct raid6_calls *const *algo;
+	const struct raid6_calls *best;
 
-	bestperf = 0;  bestprefer = 0;  best = NULL;
+	for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+		if (!best || (*algo)->prefer >= best->prefer) {
+			if ((*algo)->valid && !(*algo)->valid())
+				continue;
 
-	for ( algo = raid6_algos ; *algo ; algo++ ) {
-		if ( !(*algo)->valid || (*algo)->valid() ) {
 			perf = 0;
 
 			preempt_disable();
 			j0 = jiffies;
-			while ( (j1 = jiffies) == j0 )
+			while ((j1 = jiffies) == j0)
 				cpu_relax();
 			while (time_before(jiffies,
 					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
-				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
+				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
 				perf++;
 			}
 			preempt_enable();
 
-			if ( (*algo)->prefer > bestprefer ||
-			     ((*algo)->prefer == bestprefer &&
-			      perf > bestperf) ) {
-				best = *algo;
-				bestprefer = best->prefer;
+			if (perf > bestperf) {
 				bestperf = perf;
+				best = *algo;
 			}
 			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
 			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
@@ -174,12 +150,46 @@ int __init raid6_select_algo(void)
 	} else
 		printk("raid6: Yikes!  No algorithm found!\n");
 
-	free_pages((unsigned long)syndromes, 1);
+	return best;
+}
+
+
+/* Try to pick the best algorithm */
+/* This code uses the gfmul table as convenient data set to abuse */
+
+int __init raid6_select_algo(void)
+{
+	const int disks = (65536/PAGE_SIZE)+2;
+
+	const struct raid6_calls *gen_best;
+	const struct raid6_recov_calls *rec_best;
+	char *syndromes;
+	void *dptrs[(65536/PAGE_SIZE)+2];
+	int i;
+
+	for (i = 0; i < disks-2; i++)
+		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+
+	/* Normal code - use a 2-page allocation to avoid D$ conflict */
+	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+
+	if (!syndromes) {
+		printk("raid6: Yikes!  No memory available.\n");
+		return -ENOMEM;
+	}
+
+	dptrs[disks-2] = syndromes;
+	dptrs[disks-1] = syndromes + PAGE_SIZE;
+
+	/* select raid gen_syndrome function */
+	gen_best = raid6_choose_gen(&dptrs, disks);
 
 	/* select raid recover functions */
-	raid6_choose_recov();
+	rec_best = raid6_choose_recov();
+
+	free_pages((unsigned long)syndromes, 1);
 
-	return best ? 0 : -EINVAL;
+	return gen_best && rec_best ? 0 : -EINVAL;
 }
 
 static void raid6_exit(void)
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
@ 2012-03-17  8:52   ` Paul Menzel
  2012-03-18  0:29     ` Jim Kukunas
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Menzel @ 2012-03-17  8:52 UTC (permalink / raw)
  To: Jim Kukunas; +Cc: linux-raid, hpa

[-- Attachment #1: Type: text/plain, Size: 1008 bytes --]

Dear Jim,


Am Freitag, den 16.03.2012, 16:06 -0700 schrieb Jim Kukunas:
> Test each combination of recovery and syndrome generation
> functions.
> 
> Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
> ---
>  lib/raid6/algos.c     |    2 +-
>  lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
>  lib/raid6/x86.h       |   13 ++++++++-----
>  3 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
> index a3ac58a..82cdd01 100644
> --- a/lib/raid6/algos.c
> +++ b/lib/raid6/algos.c
> @@ -17,12 +17,12 @@
>   */
>  
>  #include <linux/raid/pq.h>
> -#include <linux/module.h>
>  #include "x86.h"
>  #ifndef __KERNEL__
>  #include <sys/mman.h>
>  #include <stdio.h>
>  #else
> +#include <linux/module.h>

that change is not mentioned in the commit message as far as I can see.

>  #include <linux/gfp.h>
>  #if !RAID6_USE_EMPTY_ZERO_PAGE
>  /* In .bss so it's zeroed */

[…]


Thanks,

Paul

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-17  8:52   ` Paul Menzel
@ 2012-03-18  0:29     ` Jim Kukunas
  2012-03-18  1:05       ` H. Peter Anvin
  0 siblings, 1 reply; 7+ messages in thread
From: Jim Kukunas @ 2012-03-18  0:29 UTC (permalink / raw)
  To: Paul Menzel; +Cc: linux-raid, hpa

On Sat, Mar 17, 2012 at 09:52:24AM +0100, Paul Menzel wrote:
> Dear Jim,

Hi Paul, 

> 
> 
> Am Freitag, den 16.03.2012, 16:06 -0700 schrieb Jim Kukunas:
> > Test each combination of recovery and syndrome generation
> > functions.
> > 
> > Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
> > ---
> >  lib/raid6/algos.c     |    2 +-
> >  lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
> >  lib/raid6/x86.h       |   13 ++++++++-----
> >  3 files changed, 30 insertions(+), 17 deletions(-)
> > 
> > diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
> > index a3ac58a..82cdd01 100644
> > --- a/lib/raid6/algos.c
> > +++ b/lib/raid6/algos.c
> > @@ -17,12 +17,12 @@
> >   */
> >  
> >  #include <linux/raid/pq.h>
> > -#include <linux/module.h>
> >  #include "x86.h"
> >  #ifndef __KERNEL__
> >  #include <sys/mman.h>
> >  #include <stdio.h>
> >  #else
> > +#include <linux/module.h>
> 
> that change is not mentioned in the commit message as far as I can see.
> 

The reason for this change is that <linux/module.h> drags in headers which
are not visible to userspace, thus breaking the build for the test program.

You are correct, it should be mentioned in the commit message.

Thanks, 

<snip>

-- 
Jim Kukunas
Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-18  0:29     ` Jim Kukunas
@ 2012-03-18  1:05       ` H. Peter Anvin
  0 siblings, 0 replies; 7+ messages in thread
From: H. Peter Anvin @ 2012-03-18  1:05 UTC (permalink / raw)
  To: Jim Kukunas; +Cc: Paul Menzel, linux-raid

On 03/17/2012 05:29 PM, Jim Kukunas wrote:
> 
> The reason for this change is that <linux/module.h> drags in headers which
> are not visible to userspace, thus breaking the build for the test program.
> 
> You are correct, it should be mentioned in the commit message.
> 

Sounds like it should be a separate commit, at the head of the series.

	-hpa


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-03-18  1:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
2012-03-17  8:52   ` Paul Menzel
2012-03-18  0:29     ` Jim Kukunas
2012-03-18  1:05       ` H. Peter Anvin
2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).