All of lore.kernel.org
 help / color / mirror / Atom feed
* lib/raid6: SSSE3 optimized recovery functions
@ 2012-03-16 23:06 Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Hi Folks,

The following patchset adds SSSE3 optimized recovery
functions to RAID6.

A technical description of the algorithm can be found
at http://www.kernel.org/pub/linux/kernel/people/hpa/raid6.pdf

Thanks.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/3] lib/raid6: Add SSSE3 optimized recovery functions.
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
  2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas
  2 siblings, 0 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Add SSSE3 optimized recovery functions, as well as a system
for selecting the most appropriate recovery functions to use.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
Originally-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/raid/pq.h |   18 +++-
 lib/raid6/algos.c       |   38 ++++++
 lib/raid6/mktables.c    |   25 ++++
 lib/raid6/recov.c       |  341 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 416 insertions(+), 6 deletions(-)

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 53272e9..640c69c 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -99,8 +99,20 @@ extern const struct raid6_calls raid6_altivec2;
 extern const struct raid6_calls raid6_altivec4;
 extern const struct raid6_calls raid6_altivec8;
 
+struct raid6_recov_calls {
+	void (*data2)(int, size_t, int, int, void **);
+	void (*datap)(int, size_t, int, void **);
+	int  (*valid)(void);
+	const char *name;
+	int priority;
+};
+
+extern const struct raid6_recov_calls raid6_recov_intx1;
+extern const struct raid6_recov_calls raid6_recov_ssse3;
+
 /* Algorithm list */
 extern const struct raid6_calls * const raid6_algos[];
+extern const struct raid6_recov_calls *const raid6_recov_algos[];
 int raid6_select_algo(void);
 
 /* Return values from chk_syndrome */
@@ -111,14 +123,16 @@ int raid6_select_algo(void);
 
 /* Galois field tables */
 extern const u8 raid6_gfmul[256][256] __attribute__((aligned(256)));
+extern const u8 raid6_vgfmul[256][32] __attribute__((aligned(256)));
 extern const u8 raid6_gfexp[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfinv[256]      __attribute__((aligned(256)));
 extern const u8 raid6_gfexi[256]      __attribute__((aligned(256)));
 
 /* Recovery routines */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+extern void (*raid6_2data_recov)(int disks, size_t bytes, int faila, int failb,
 		       void **ptrs);
-void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
+extern void (*raid6_datap_recov)(int disks, size_t bytes, int faila,
+			void **ptrs);
 void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
 		      void **ptrs);
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 8b02f60..a3ac58a 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,7 @@
 
 #include <linux/raid/pq.h>
 #include <linux/module.h>
+#include "x86.h"
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
@@ -64,6 +65,20 @@ const struct raid6_calls * const raid6_algos[] = {
 	NULL
 };
 
+void (*raid6_2data_recov)(int, size_t, int, int, void **);
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
+
+void (*raid6_datap_recov)(int, size_t, int, void **);
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+const struct raid6_recov_calls *const raid6_recov_algos[] = {
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+	&raid6_recov_ssse3,
+#endif
+	&raid6_recov_intx1,
+	NULL
+};
+
 #ifdef __KERNEL__
 #define RAID6_TIME_JIFFIES_LG2	4
 #else
@@ -72,6 +87,26 @@ const struct raid6_calls * const raid6_algos[] = {
 #define time_before(x, y) ((x) < (y))
 #endif
 
+static inline void raid6_choose_recov(void)
+{
+	const struct raid6_recov_calls *const *algo;
+	const struct raid6_recov_calls *best;
+
+	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
+		if (!best || (*algo)->priority > best->priority)
+			if (!(*algo)->valid || (*algo)->valid())
+				best = *algo;
+
+	if (best) {
+		raid6_2data_recov = best->data2;
+		raid6_datap_recov = best->datap;
+
+		printk("raid6: using %s recovery algorithm\n", best->name);
+	} else
+		printk("raid6: Yikes! No recovery algorithm found!\n");
+}
+
+
 /* Try to pick the best algorithm */
 /* This code uses the gfmul table as convenient data set to abuse */
 
@@ -141,6 +176,9 @@ int __init raid6_select_algo(void)
 
 	free_pages((unsigned long)syndromes, 1);
 
+	/* select raid recover functions */
+	raid6_choose_recov();
+
 	return best ? 0 : -EINVAL;
 }
 
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
index 8a37809..39787db 100644
--- a/lib/raid6/mktables.c
+++ b/lib/raid6/mktables.c
@@ -81,6 +81,31 @@ int main(int argc, char *argv[])
 	printf("EXPORT_SYMBOL(raid6_gfmul);\n");
 	printf("#endif\n");
 
+	/* Compute vector multiplication table */
+	printf("\nconst u8  __attribute__((aligned(256)))\n"
+		"raid6_vgfmul[256][32] =\n"
+		"{\n");
+	for (i = 0; i < 256; i++) {
+		printf("\t{\n");
+		for (j = 0; j < 16; j += 8) {
+			printf("\t\t");
+			for (k = 0; k < 8; k++)
+				printf("0x%02x,%c", gfmul(i, j + k),
+				       (k == 7) ? '\n' : ' ');
+		}
+		for (j = 0; j < 16; j += 8) {
+			printf("\t\t");
+			for (k = 0; k < 8; k++)
+				printf("0x%02x,%c", gfmul(i, (j + k) << 4),
+				       (k == 7) ? '\n' : ' ');
+		}
+		printf("\t},\n");
+	}
+	printf("};\n");
+	printf("#ifdef __KERNEL__\n");
+	printf("EXPORT_SYMBOL(raid6_vgfmul);\n");
+	printf("#endif\n");
+
 	/* Compute power-of-2 table (exponent) */
 	v = 1;
 	printf("\nconst u8 __attribute__((aligned(256)))\n"
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
index fe275d7..ebaa307 100644
--- a/lib/raid6/recov.c
+++ b/lib/raid6/recov.c
@@ -20,9 +20,17 @@
 
 #include <linux/export.h>
 #include <linux/raid/pq.h>
+#include "x86.h"
+
+static int raid6_has_ssse3(void)
+{
+	return boot_cpu_has(X86_FEATURE_XMM) &&
+		boot_cpu_has(X86_FEATURE_XMM2) &&
+		boot_cpu_has(X86_FEATURE_SSSE3);
+}
 
 /* Recover two failed data blocks. */
-void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb,
 		       void **ptrs)
 {
 	u8 *p, *q, *dp, *dq;
@@ -64,10 +72,9 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
 		p++; q++;
 	}
 }
-EXPORT_SYMBOL_GPL(raid6_2data_recov);
 
 /* Recover failure of one data block plus the P block */
-void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
+void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs)
 {
 	u8 *p, *q, *dq;
 	const u8 *qmul;		/* Q multiplier table */
@@ -96,7 +103,333 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
 		q++; dq++;
 	}
 }
-EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+
+const struct raid6_recov_calls raid6_recov_intx1 = {
+	.data2 = raid6_2data_recov_intx1,
+	.datap = raid6_datap_recov_intx1,
+	.valid = NULL,
+	.name = "intx1",
+	.priority = 0,
+};
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb,
+		       void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	static const u8 __attribute__((aligned(16))) x0f[16] = {
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
+		raid6_gfexp[failb]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0]));
+
+#ifdef CONFIG_X86_64
+	asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0]));
+	asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0]));
+	asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16]));
+#endif
+
+	/* Now do it... */
+	while (bytes) {
+#ifdef CONFIG_X86_64
+	/* xmm6, xmm14, xmm15 */
+
+		asm volatile("movdqa %0,%%xmm1" : : "m" (q[0]));
+		asm volatile("movdqa %0,%%xmm9" : : "m" (q[16]));
+		asm volatile("movdqa %0,%%xmm0" : : "m" (p[0]));
+		asm volatile("movdqa %0,%%xmm8" : : "m" (p[16]));
+		asm volatile("pxor   %0,%%xmm1" : : "m" (dq[0]));
+		asm volatile("pxor   %0,%%xmm9" : : "m" (dq[16]));
+		asm volatile("pxor   %0,%%xmm0" : : "m" (dp[0]));
+		asm volatile("pxor   %0,%%xmm8" : : "m" (dp[16]));
+
+		/* xmm0/8 = px */
+
+		asm volatile("movdqa %xmm6,%xmm4");
+		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
+		asm volatile("movdqa %xmm6,%xmm12");
+		asm volatile("movdqa %0,%%xmm13" : : "m" (qmul[16]));
+		asm volatile("movdqa %xmm1,%xmm3");
+		asm volatile("movdqa %xmm9,%xmm11");
+		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */
+		asm volatile("movdqa %xmm8,%xmm10");
+		asm volatile("psraw  $4,%xmm1");
+		asm volatile("psraw  $4,%xmm9");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm11");
+		asm volatile("pand   %xmm7,%xmm1");
+		asm volatile("pand   %xmm7,%xmm9");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm11,%xmm12");
+		asm volatile("pshufb %xmm1,%xmm5");
+		asm volatile("pshufb %xmm9,%xmm13");
+		asm volatile("pxor   %xmm4,%xmm5");
+		asm volatile("pxor   %xmm12,%xmm13");
+
+		/* xmm5/13 = qx */
+
+		asm volatile("movdqa %xmm14,%xmm4");
+		asm volatile("movdqa %xmm15,%xmm1");
+		asm volatile("movdqa %xmm14,%xmm12");
+		asm volatile("movdqa %xmm15,%xmm9");
+		asm volatile("movdqa %xmm2,%xmm3");
+		asm volatile("movdqa %xmm10,%xmm11");
+		asm volatile("psraw  $4,%xmm2");
+		asm volatile("psraw  $4,%xmm10");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm11");
+		asm volatile("pand   %xmm7,%xmm2");
+		asm volatile("pand   %xmm7,%xmm10");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm11,%xmm12");
+		asm volatile("pshufb %xmm2,%xmm1");
+		asm volatile("pshufb %xmm10,%xmm9");
+		asm volatile("pxor   %xmm4,%xmm1");
+		asm volatile("pxor   %xmm12,%xmm9");
+
+		/* xmm1/9 = pbmul[px] */
+		asm volatile("pxor   %xmm5,%xmm1");
+		asm volatile("pxor   %xmm13,%xmm9");
+		/* xmm1/9 = db = DQ */
+		asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16]));
+
+		asm volatile("pxor   %xmm1,%xmm0");
+		asm volatile("pxor   %xmm9,%xmm8");
+		asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0]));
+		asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dp += 32;
+		dq += 32;
+#else
+		asm volatile("movdqa %0,%%xmm1" : : "m" (*q));
+		asm volatile("movdqa %0,%%xmm0" : : "m" (*p));
+		asm volatile("pxor   %0,%%xmm1" : : "m" (*dq));
+		asm volatile("pxor   %0,%%xmm0" : : "m" (*dp));
+
+		/* 1 = dq ^ q
+		 * 0 = dp ^ p
+		 */
+		asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0]));
+		asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16]));
+
+		asm volatile("movdqa %xmm1,%xmm3");
+		asm volatile("psraw  $4,%xmm1");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm1");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm1,%xmm5");
+		asm volatile("pxor   %xmm4,%xmm5");
+
+		asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */
+
+		/* xmm5 = qx */
+
+		asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0]));
+		asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16]));
+		asm volatile("movdqa %xmm2,%xmm3");
+		asm volatile("psraw  $4,%xmm2");
+		asm volatile("pand   %xmm7,%xmm3");
+		asm volatile("pand   %xmm7,%xmm2");
+		asm volatile("pshufb %xmm3,%xmm4");
+		asm volatile("pshufb %xmm2,%xmm1");
+		asm volatile("pxor   %xmm4,%xmm1");
+
+		/* xmm1 = pbmul[px] */
+		asm volatile("pxor   %xmm5,%xmm1");
+		/* xmm1 = db = DQ */
+		asm volatile("movdqa %%xmm1,%0" : "=m" (*dq));
+
+		asm volatile("pxor   %xmm1,%xmm0");
+		asm volatile("movdqa %%xmm0,%0" : "=m" (*dp));
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dp += 16;
+		dq += 16;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+
+void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	static const u8 __attribute__((aligned(16))) x0f[16] = {
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+		 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f};
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	kernel_fpu_begin();
+
+	asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0]));
+
+	while (bytes) {
+#ifdef CONFIG_X86_64
+		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
+		asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16]));
+		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
+		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
+
+		/* xmm3 = q[0] ^ dq[0] */
+
+		asm volatile("pxor %0, %%xmm4" : : "m" (q[16]));
+		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
+
+		/* xmm4 = q[16] ^ dq[16] */
+
+		asm volatile("movdqa %xmm3, %xmm6");
+		asm volatile("movdqa %xmm4, %xmm8");
+
+		/* xmm4 = xmm8 = q[16] ^ dq[16] */
+
+		asm volatile("psraw $4, %xmm3");
+		asm volatile("pand %xmm7, %xmm6");
+		asm volatile("pand %xmm7, %xmm3");
+		asm volatile("pshufb %xmm6, %xmm0");
+		asm volatile("pshufb %xmm3, %xmm1");
+		asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0]));
+		asm volatile("pxor %xmm0, %xmm1");
+		asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16]));
+
+		/* xmm1 = qmul[q[0] ^ dq[0]] */
+
+		asm volatile("psraw $4, %xmm4");
+		asm volatile("pand %xmm7, %xmm8");
+		asm volatile("pand %xmm7, %xmm4");
+		asm volatile("pshufb %xmm8, %xmm10");
+		asm volatile("pshufb %xmm4, %xmm11");
+		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
+		asm volatile("pxor %xmm10, %xmm11");
+		asm volatile("movdqa %0, %%xmm12" : : "m" (p[16]));
+
+		/* xmm11 = qmul[q[16] ^ dq[16]] */
+
+		asm volatile("pxor %xmm1, %xmm2");
+
+		/* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */
+
+		asm volatile("pxor %xmm11, %xmm12");
+
+		/* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */
+
+		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16]));
+
+		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
+		asm volatile("movdqa %%xmm12, %0" : "=m" (p[16]));
+
+		bytes -= 32;
+		p += 32;
+		q += 32;
+		dq += 32;
+
+#else
+		asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0]));
+		asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0]));
+		asm volatile("pxor %0, %%xmm3" : : "m" (q[0]));
+		asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16]));
+
+		/* xmm3 = *q ^ *dq */
+
+		asm volatile("movdqa %xmm3, %xmm6");
+		asm volatile("movdqa %0, %%xmm2" : : "m" (p[0]));
+		asm volatile("psraw $4, %xmm3");
+		asm volatile("pand %xmm7, %xmm6");
+		asm volatile("pand %xmm7, %xmm3");
+		asm volatile("pshufb %xmm6, %xmm0");
+		asm volatile("pshufb %xmm3, %xmm1");
+		asm volatile("pxor %xmm0, %xmm1");
+
+		/* xmm1 = qmul[*q ^ *dq */
+
+		asm volatile("pxor %xmm1, %xmm2");
+
+		/* xmm2 = *p ^ qmul[*q ^ *dq] */
+
+		asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0]));
+		asm volatile("movdqa %%xmm2, %0" : "=m" (p[0]));
+
+		bytes -= 16;
+		p += 16;
+		q += 16;
+		dq += 16;
+#endif
+	}
+
+	kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_ssse3 = {
+	.data2 = raid6_2data_recov_ssse3,
+	.datap = raid6_datap_recov_ssse3,
+	.valid = raid6_has_ssse3,
+#ifdef CONFIG_X86_64
+	.name = "ssse3x2",
+#else
+	.name = "ssse3x1",
+#endif
+	.priority = 1,
+};
+
+#endif
+
 
 #ifndef __KERNEL__
 /* Testing only */
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2012-03-17  8:52   ` Paul Menzel
  2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas
  2 siblings, 1 reply; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Test each combination of recovery and syndrome generation
functions.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
---
 lib/raid6/algos.c     |    2 +-
 lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
 lib/raid6/x86.h       |   13 ++++++++-----
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index a3ac58a..82cdd01 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -17,12 +17,12 @@
  */
 
 #include <linux/raid/pq.h>
-#include <linux/module.h>
 #include "x86.h"
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
 #else
+#include <linux/module.h>
 #include <linux/gfp.h>
 #if !RAID6_USE_EMPTY_ZERO_PAGE
 /* In .bss so it's zeroed */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
index 7a93031..5a485b7 100644
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -90,25 +90,35 @@ static int test_disks(int i, int j)
 int main(int argc, char *argv[])
 {
 	const struct raid6_calls *const *algo;
+	const struct raid6_recov_calls *const *ra;
 	int i, j;
 	int err = 0;
 
 	makedata();
 
-	for (algo = raid6_algos; *algo; algo++) {
-		if (!(*algo)->valid || (*algo)->valid()) {
-			raid6_call = **algo;
+	for (ra = raid6_recov_algos; *ra; ra++) {
+		if ((*ra)->valid  && !(*ra)->valid())
+			continue;
+		raid6_2data_recov = (*ra)->data2;
+		raid6_datap_recov = (*ra)->datap;
 
-			/* Nuke syndromes */
-			memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+		printf("using recovery %s\n", (*ra)->name);
 
-			/* Generate assumed good syndrome */
-			raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
-						(void **)&dataptrs);
+		for (algo = raid6_algos; *algo; algo++) {
+			if (!(*algo)->valid || (*algo)->valid()) {
+				raid6_call = **algo;
 
-			for (i = 0; i < NDISKS-1; i++)
-				for (j = i+1; j < NDISKS; j++)
-					err += test_disks(i, j);
+				/* Nuke syndromes */
+				memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+				/* Generate assumed good syndrome */
+				raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+							(void **)&dataptrs);
+
+				for (i = 0; i < NDISKS-1; i++)
+					for (j = i+1; j < NDISKS; j++)
+						err += test_disks(i, j);
+			}
 		}
 		printf("\n");
 	}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
index cb2a8c9..9aff51f 100644
--- a/lib/raid6/x86.h
+++ b/lib/raid6/x86.h
@@ -40,19 +40,22 @@ static inline void kernel_fpu_end(void)
 					   * (fast save and restore) */
 #define X86_FEATURE_XMM		(0*32+25) /* Streaming SIMD Extensions */
 #define X86_FEATURE_XMM2	(0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_SSSE3	(4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_AVX	(4*32+28) /* Advanced Vector Extensions */
 #define X86_FEATURE_MMXEXT	(1*32+22) /* AMD MMX extensions */
 
 /* Should work well enough on modern CPUs for testing */
 static inline int boot_cpu_has(int flag)
 {
-	u32 eax = (flag >> 5) ? 0x80000001 : 1;
-	u32 edx;
+	u32 eax = (flag & 0x20) ? 0x80000001 : 1;
+	u32 ecx, edx;
 
 	asm volatile("cpuid"
-		     : "+a" (eax), "=d" (edx)
-		     : : "ecx", "ebx");
+		     : "+a" (eax), "=d" (edx), "=c" (ecx)
+		     : : "ebx");
 
-	return (edx >> (flag & 31)) & 1;
+	return ((flag & 0x80 ? ecx : edx) >> (flag & 31)) & 1;
 }
 
 #endif /* ndef __KERNEL__ */
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection
  2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
  2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
@ 2012-03-16 23:06 ` Jim Kukunas
  2 siblings, 0 replies; 7+ messages in thread
From: Jim Kukunas @ 2012-03-16 23:06 UTC (permalink / raw)
  To: linux-raid; +Cc: hpa

Reorders functions in raid6_algos as well as preference check
to reduce the number of functions tested on initialization.

Also, creates symmetry between choosing the gen_syndrome functions
and choosing the recovery functions.

Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
---
 lib/raid6/algos.c |  104 +++++++++++++++++++++++++++++------------------------
 1 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 82cdd01..f035942 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -35,10 +35,6 @@ struct raid6_calls raid6_call;
 EXPORT_SYMBOL_GPL(raid6_call);
 
 const struct raid6_calls * const raid6_algos[] = {
-	&raid6_intx1,
-	&raid6_intx2,
-	&raid6_intx4,
-	&raid6_intx8,
 #if defined(__ia64__)
 	&raid6_intx16,
 	&raid6_intx32,
@@ -62,6 +58,10 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec4,
 	&raid6_altivec8,
 #endif
+	&raid6_intx1,
+	&raid6_intx2,
+	&raid6_intx4,
+	&raid6_intx8,
 	NULL
 };
 
@@ -87,7 +87,7 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #define time_before(x, y) ((x) < (y))
 #endif
 
-static inline void raid6_choose_recov(void)
+static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 {
 	const struct raid6_recov_calls *const *algo;
 	const struct raid6_recov_calls *best;
@@ -104,62 +104,38 @@ static inline void raid6_choose_recov(void)
 		printk("raid6: using %s recovery algorithm\n", best->name);
 	} else
 		printk("raid6: Yikes! No recovery algorithm found!\n");
-}
-
 
-/* Try to pick the best algorithm */
-/* This code uses the gfmul table as convenient data set to abuse */
+	return best;
+}
 
-int __init raid6_select_algo(void)
+static inline const struct raid6_calls *raid6_choose_gen(
+	void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
 {
-	const struct raid6_calls * const * algo;
-	const struct raid6_calls * best;
-	char *syndromes;
-	void *dptrs[(65536/PAGE_SIZE)+2];
-	int i, disks;
-	unsigned long perf, bestperf;
-	int bestprefer;
-	unsigned long j0, j1;
-
-	disks = (65536/PAGE_SIZE)+2;
-	for ( i = 0 ; i < disks-2 ; i++ ) {
-		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
-	}
-
-	/* Normal code - use a 2-page allocation to avoid D$ conflict */
-	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
-
-	if ( !syndromes ) {
-		printk("raid6: Yikes!  No memory available.\n");
-		return -ENOMEM;
-	}
-
-	dptrs[disks-2] = syndromes;
-	dptrs[disks-1] = syndromes + PAGE_SIZE;
+	unsigned long perf, bestperf, j0, j1;
+	const struct raid6_calls *const *algo;
+	const struct raid6_calls *best;
 
-	bestperf = 0;  bestprefer = 0;  best = NULL;
+	for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+		if (!best || (*algo)->prefer >= best->prefer) {
+			if ((*algo)->valid && !(*algo)->valid())
+				continue;
 
-	for ( algo = raid6_algos ; *algo ; algo++ ) {
-		if ( !(*algo)->valid || (*algo)->valid() ) {
 			perf = 0;
 
 			preempt_disable();
 			j0 = jiffies;
-			while ( (j1 = jiffies) == j0 )
+			while ((j1 = jiffies) == j0)
 				cpu_relax();
 			while (time_before(jiffies,
 					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
-				(*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
+				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
 				perf++;
 			}
 			preempt_enable();
 
-			if ( (*algo)->prefer > bestprefer ||
-			     ((*algo)->prefer == bestprefer &&
-			      perf > bestperf) ) {
-				best = *algo;
-				bestprefer = best->prefer;
+			if (perf > bestperf) {
 				bestperf = perf;
+				best = *algo;
 			}
 			printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
 			       (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
@@ -174,12 +150,46 @@ int __init raid6_select_algo(void)
 	} else
 		printk("raid6: Yikes!  No algorithm found!\n");
 
-	free_pages((unsigned long)syndromes, 1);
+	return best;
+}
+
+
+/* Try to pick the best algorithm */
+/* This code uses the gfmul table as convenient data set to abuse */
+
+int __init raid6_select_algo(void)
+{
+	const int disks = (65536/PAGE_SIZE)+2;
+
+	const struct raid6_calls *gen_best;
+	const struct raid6_recov_calls *rec_best;
+	char *syndromes;
+	void *dptrs[(65536/PAGE_SIZE)+2];
+	int i;
+
+	for (i = 0; i < disks-2; i++)
+		dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+
+	/* Normal code - use a 2-page allocation to avoid D$ conflict */
+	syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+
+	if (!syndromes) {
+		printk("raid6: Yikes!  No memory available.\n");
+		return -ENOMEM;
+	}
+
+	dptrs[disks-2] = syndromes;
+	dptrs[disks-1] = syndromes + PAGE_SIZE;
+
+	/* select raid gen_syndrome function */
+	gen_best = raid6_choose_gen(&dptrs, disks);
 
 	/* select raid recover functions */
-	raid6_choose_recov();
+	rec_best = raid6_choose_recov();
+
+	free_pages((unsigned long)syndromes, 1);
 
-	return best ? 0 : -EINVAL;
+	return gen_best && rec_best ? 0 : -EINVAL;
 }
 
 static void raid6_exit(void)
-- 
1.7.3.4


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
@ 2012-03-17  8:52   ` Paul Menzel
  2012-03-18  0:29     ` Jim Kukunas
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Menzel @ 2012-03-17  8:52 UTC (permalink / raw)
  To: Jim Kukunas; +Cc: linux-raid, hpa

[-- Attachment #1: Type: text/plain, Size: 1008 bytes --]

Dear Jim,


Am Freitag, den 16.03.2012, 16:06 -0700 schrieb Jim Kukunas:
> Test each combination of recovery and syndrome generation
> functions.
> 
> Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
> ---
>  lib/raid6/algos.c     |    2 +-
>  lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
>  lib/raid6/x86.h       |   13 ++++++++-----
>  3 files changed, 30 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
> index a3ac58a..82cdd01 100644
> --- a/lib/raid6/algos.c
> +++ b/lib/raid6/algos.c
> @@ -17,12 +17,12 @@
>   */
>  
>  #include <linux/raid/pq.h>
> -#include <linux/module.h>
>  #include "x86.h"
>  #ifndef __KERNEL__
>  #include <sys/mman.h>
>  #include <stdio.h>
>  #else
> +#include <linux/module.h>

that change is not mentioned in the commit message as far as I can see.

>  #include <linux/gfp.h>
>  #if !RAID6_USE_EMPTY_ZERO_PAGE
>  /* In .bss so it's zeroed */

[…]


Thanks,

Paul

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-17  8:52   ` Paul Menzel
@ 2012-03-18  0:29     ` Jim Kukunas
  2012-03-18  1:05       ` H. Peter Anvin
  0 siblings, 1 reply; 7+ messages in thread
From: Jim Kukunas @ 2012-03-18  0:29 UTC (permalink / raw)
  To: Paul Menzel; +Cc: linux-raid, hpa

On Sat, Mar 17, 2012 at 09:52:24AM +0100, Paul Menzel wrote:
> Dear Jim,

Hi Paul, 

> 
> 
> Am Freitag, den 16.03.2012, 16:06 -0700 schrieb Jim Kukunas:
> > Test each combination of recovery and syndrome generation
> > functions.
> > 
> > Signed-off-by: Jim Kukunas <james.t.kukunas@linux.intel.com>
> > ---
> >  lib/raid6/algos.c     |    2 +-
> >  lib/raid6/test/test.c |   32 +++++++++++++++++++++-----------
> >  lib/raid6/x86.h       |   13 ++++++++-----
> >  3 files changed, 30 insertions(+), 17 deletions(-)
> > 
> > diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
> > index a3ac58a..82cdd01 100644
> > --- a/lib/raid6/algos.c
> > +++ b/lib/raid6/algos.c
> > @@ -17,12 +17,12 @@
> >   */
> >  
> >  #include <linux/raid/pq.h>
> > -#include <linux/module.h>
> >  #include "x86.h"
> >  #ifndef __KERNEL__
> >  #include <sys/mman.h>
> >  #include <stdio.h>
> >  #else
> > +#include <linux/module.h>
> 
> that change is not mentioned in the commit message as far as I can see.
> 

The reason for this change is that <linux/module.h> drags in headers which
are not visible to userspace, thus breaking the build for the test program.

You are correct, it should be mentioned in the commit message.

Thanks, 

<snip>

-- 
Jim Kukunas
Intel Open Source Technology Center

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] lib/raid6: update test program recovery functions
  2012-03-18  0:29     ` Jim Kukunas
@ 2012-03-18  1:05       ` H. Peter Anvin
  0 siblings, 0 replies; 7+ messages in thread
From: H. Peter Anvin @ 2012-03-18  1:05 UTC (permalink / raw)
  To: Jim Kukunas; +Cc: Paul Menzel, linux-raid

On 03/17/2012 05:29 PM, Jim Kukunas wrote:
> 
> The reason for this change is that <linux/module.h> drags in headers which
> are not visible to userspace, thus breaking the build for the test program.
> 
> You are correct, it should be mentioned in the commit message.
> 

Sounds like it should be a separate commit, at the head of the series.

	-hpa


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-03-18  1:05 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-03-16 23:06 lib/raid6: SSSE3 optimized recovery functions Jim Kukunas
2012-03-16 23:06 ` [PATCH 1/3] lib/raid6: Add " Jim Kukunas
2012-03-16 23:06 ` [PATCH 2/3] lib/raid6: update test program " Jim Kukunas
2012-03-17  8:52   ` Paul Menzel
2012-03-18  0:29     ` Jim Kukunas
2012-03-18  1:05       ` H. Peter Anvin
2012-03-16 23:06 ` [PATCH 3/3] lib/raid6: cleanup gen_syndrome function selection Jim Kukunas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.