public inbox for mm-commits@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: mm-commits@vger.kernel.org,will@kernel.org,tytso@mit.edu,svens@linux.ibm.com,song@kernel.org,richard@nod.at,richard.henderson@linaro.org,palmer@dabbelt.com,npiggin@gmail.com,mpe@ellerman.id.au,mingo@redhat.com,mattst88@gmail.com,maddy@linux.ibm.com,linux@armlinux.org.uk,linmag7@gmail.com,linan122@huawei.com,kernel@xen0n.name,johannes@sipsolutions.net,jason@zx2c4.com,hpa@zytor.com,herbert@gondor.apana.org.au,hca@linux.ibm.com,gor@linux.ibm.com,ebiggers@kernel.org,dsterba@suse.com,davem@davemloft.net,dan.j.williams@intel.com,clm@fb.com,chenhuacai@kernel.org,catalin.marinas@arm.com,bp@alien8.de,borntraeger@linux.ibm.com,arnd@arndb.de,ardb@kernel.org,aou@eecs.berkeley.edu,anton.ivanov@cambridgegreys.com,andreas@gaisler.com,alex@ghiti.fr,agordeev@linux.ibm.com,hch@lst.de,akpm@linux-foundation.org
Subject: [merged mm-nonmm-stable] loongarch-move-the-xor-code-to-lib-raid.patch removed from -mm tree
Date: Thu, 02 Apr 2026 23:41:52 -0700	[thread overview]
Message-ID: <20260403064152.87C68C4CEF7@smtp.kernel.org> (raw)


The quilt patch titled
     Subject: loongarch: move the XOR code to lib/raid/
has been removed from the -mm tree.  Its filename was
     loongarch-move-the-xor-code-to-lib-raid.patch

This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

------------------------------------------------------
From: Christoph Hellwig <hch@lst.de>
Subject: loongarch: move the XOR code to lib/raid/
Date: Fri, 27 Mar 2026 07:16:46 +0100

Move the optimized XOR into lib/raid and include it it in xor.ko instead
of always building it into the main kernel image.

Link: https://lkml.kernel.org/r/20260327061704.3707577-15-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@kernel.org>
Tested-by: Eric Biggers <ebiggers@kernel.org>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexandre Ghiti <alex@ghiti.fr>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Borislav Petkov (AMD)" <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Mason <clm@fb.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: David Sterba <dsterba@suse.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Li Nan <linan122@huawei.com>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Magnus Lindholm <linmag7@gmail.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Song Liu <song@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 arch/loongarch/include/asm/xor.h       |   24 -----
 arch/loongarch/include/asm/xor_simd.h  |   34 -------
 arch/loongarch/lib/Makefile            |    2 
 arch/loongarch/lib/xor_simd.c          |   93 -------------------
 arch/loongarch/lib/xor_simd.h          |   38 -------
 arch/loongarch/lib/xor_simd_glue.c     |   72 ---------------
 arch/loongarch/lib/xor_template.c      |  110 -----------------------
 lib/raid/xor/Makefile                  |    2 
 lib/raid/xor/loongarch/xor_simd.c      |   93 +++++++++++++++++++
 lib/raid/xor/loongarch/xor_simd.h      |   38 +++++++
 lib/raid/xor/loongarch/xor_simd_glue.c |   77 ++++++++++++++++
 lib/raid/xor/loongarch/xor_template.c  |  110 +++++++++++++++++++++++
 12 files changed, 323 insertions(+), 370 deletions(-)

--- a/arch/loongarch/include/asm/xor.h~loongarch-move-the-xor-code-to-lib-raid
+++ a/arch/loongarch/include/asm/xor.h
@@ -6,27 +6,6 @@
 #define _ASM_LOONGARCH_XOR_H
 
 #include <asm/cpu-features.h>
-#include <asm/xor_simd.h>
-
-#ifdef CONFIG_CPU_HAS_LSX
-static struct xor_block_template xor_block_lsx = {
-	.name = "lsx",
-	.do_2 = xor_lsx_2,
-	.do_3 = xor_lsx_3,
-	.do_4 = xor_lsx_4,
-	.do_5 = xor_lsx_5,
-};
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-static struct xor_block_template xor_block_lasx = {
-	.name = "lasx",
-	.do_2 = xor_lasx_2,
-	.do_3 = xor_lasx_3,
-	.do_4 = xor_lasx_4,
-	.do_5 = xor_lasx_5,
-};
-#endif /* CONFIG_CPU_HAS_LASX */
 
 /*
  * For grins, also test the generic routines.
@@ -38,6 +17,9 @@ static struct xor_block_template xor_blo
  */
 #include <asm-generic/xor.h>
 
+extern struct xor_block_template xor_block_lsx;
+extern struct xor_block_template xor_block_lasx;
+
 #define arch_xor_init arch_xor_init
 static __always_inline void __init arch_xor_init(void)
 {
diff --git a/arch/loongarch/include/asm/xor_simd.h a/arch/loongarch/include/asm/xor_simd.h
deleted file mode 100644
--- a/arch/loongarch/include/asm/xor_simd.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-#ifndef _ASM_LOONGARCH_XOR_SIMD_H
-#define _ASM_LOONGARCH_XOR_SIMD_H
-
-#ifdef CONFIG_CPU_HAS_LSX
-void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
-	       const unsigned long * __restrict p2);
-void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
-	       const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
-	       const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-	       const unsigned long * __restrict p4);
-void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
-	       const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-	       const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
-	        const unsigned long * __restrict p2);
-void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
-	        const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
-	        const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-	        const unsigned long * __restrict p4);
-void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
-	        const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-	        const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LASX */
-
-#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
--- a/arch/loongarch/lib/Makefile~loongarch-move-the-xor-code-to-lib-raid
+++ a/arch/loongarch/lib/Makefile
@@ -8,6 +8,4 @@ lib-y	+= delay.o memset.o memcpy.o memmo
 
 obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
 
-obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
-
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/xor_simd.c a/arch/loongarch/lib/xor_simd.c
deleted file mode 100644
--- a/arch/loongarch/lib/xor_simd.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * LoongArch SIMD XOR operations
- *
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-
-#include "xor_simd.h"
-
-/*
- * Process one cache line (64 bytes) per loop. This is assuming all future
- * popular LoongArch cores are similar performance-characteristics-wise to the
- * current models.
- */
-#define LINE_WIDTH 64
-
-#ifdef CONFIG_CPU_HAS_LSX
-
-#define LD(reg, base, offset)	\
-	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
-#define ST(reg, base, offset)	\
-	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
-#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
-
-#define LD_INOUT_LINE(base)	\
-	LD(0, base, 0)		\
-	LD(1, base, 16)		\
-	LD(2, base, 32)		\
-	LD(3, base, 48)
-
-#define LD_AND_XOR_LINE(base)	\
-	LD(4, base, 0)		\
-	LD(5, base, 16)		\
-	LD(6, base, 32)		\
-	LD(7, base, 48)		\
-	XOR(0, 4)		\
-	XOR(1, 5)		\
-	XOR(2, 6)		\
-	XOR(3, 7)
-
-#define ST_LINE(base)		\
-	ST(0, base, 0)		\
-	ST(1, base, 16)		\
-	ST(2, base, 32)		\
-	ST(3, base, 48)
-
-#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
-#include "xor_template.c"
-
-#undef LD
-#undef ST
-#undef XOR
-#undef LD_INOUT_LINE
-#undef LD_AND_XOR_LINE
-#undef ST_LINE
-#undef XOR_FUNC_NAME
-
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-
-#define LD(reg, base, offset)	\
-	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
-#define ST(reg, base, offset)	\
-	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
-#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
-
-#define LD_INOUT_LINE(base)	\
-	LD(0, base, 0)		\
-	LD(1, base, 32)
-
-#define LD_AND_XOR_LINE(base)	\
-	LD(2, base, 0)		\
-	LD(3, base, 32)		\
-	XOR(0, 2)		\
-	XOR(1, 3)
-
-#define ST_LINE(base)		\
-	ST(0, base, 0)		\
-	ST(1, base, 32)
-
-#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
-#include "xor_template.c"
-
-#undef LD
-#undef ST
-#undef XOR
-#undef LD_INOUT_LINE
-#undef LD_AND_XOR_LINE
-#undef ST_LINE
-#undef XOR_FUNC_NAME
-
-#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd_glue.c a/arch/loongarch/lib/xor_simd_glue.c
deleted file mode 100644
--- a/arch/loongarch/lib/xor_simd_glue.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * LoongArch SIMD XOR operations
- *
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- */
-
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <asm/fpu.h>
-#include <asm/xor_simd.h>
-#include "xor_simd.h"
-
-#define MAKE_XOR_GLUE_2(flavor)							\
-void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,	\
-		      const unsigned long * __restrict p2)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_2(bytes, p1, p2);					\
-	kernel_fpu_end();							\
-}										\
-EXPORT_SYMBOL_GPL(xor_##flavor##_2)
-
-#define MAKE_XOR_GLUE_3(flavor)							\
-void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,	\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_3(bytes, p1, p2, p3);					\
-	kernel_fpu_end();							\
-}										\
-EXPORT_SYMBOL_GPL(xor_##flavor##_3)
-
-#define MAKE_XOR_GLUE_4(flavor)							\
-void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,	\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3,			\
-		      const unsigned long * __restrict p4)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_4(bytes, p1, p2, p3, p4);				\
-	kernel_fpu_end();							\
-}										\
-EXPORT_SYMBOL_GPL(xor_##flavor##_4)
-
-#define MAKE_XOR_GLUE_5(flavor)							\
-void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,	\
-		      const unsigned long * __restrict p2,			\
-		      const unsigned long * __restrict p3,			\
-		      const unsigned long * __restrict p4,			\
-		      const unsigned long * __restrict p5)			\
-{										\
-	kernel_fpu_begin();							\
-	__xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);				\
-	kernel_fpu_end();							\
-}										\
-EXPORT_SYMBOL_GPL(xor_##flavor##_5)
-
-#define MAKE_XOR_GLUES(flavor)		\
-	MAKE_XOR_GLUE_2(flavor);	\
-	MAKE_XOR_GLUE_3(flavor);	\
-	MAKE_XOR_GLUE_4(flavor);	\
-	MAKE_XOR_GLUE_5(flavor)
-
-#ifdef CONFIG_CPU_HAS_LSX
-MAKE_XOR_GLUES(lsx);
-#endif
-
-#ifdef CONFIG_CPU_HAS_LASX
-MAKE_XOR_GLUES(lasx);
-#endif
diff --git a/arch/loongarch/lib/xor_simd.h a/arch/loongarch/lib/xor_simd.h
deleted file mode 100644
--- a/arch/loongarch/lib/xor_simd.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Simple interface to link xor_simd.c and xor_simd_glue.c
- *
- * Separating these files ensures that no SIMD instructions are run outside of
- * the kfpu critical section.
- */
-
-#ifndef __LOONGARCH_LIB_XOR_SIMD_H
-#define __LOONGARCH_LIB_XOR_SIMD_H
-
-#ifdef CONFIG_CPU_HAS_LSX
-void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
-		 const unsigned long * __restrict p2);
-void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
-		 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
-		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-		 const unsigned long * __restrict p4);
-void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
-		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-		 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LSX */
-
-#ifdef CONFIG_CPU_HAS_LASX
-void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
-		  const unsigned long * __restrict p2);
-void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
-		  const unsigned long * __restrict p2, const unsigned long * __restrict p3);
-void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
-		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-		  const unsigned long * __restrict p4);
-void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
-		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
-		  const unsigned long * __restrict p4, const unsigned long * __restrict p5);
-#endif /* CONFIG_CPU_HAS_LASX */
-
-#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_template.c a/arch/loongarch/lib/xor_template.c
deleted file mode 100644
--- a/arch/loongarch/lib/xor_template.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
- *
- * Template for XOR operations, instantiated in xor_simd.c.
- *
- * Expected preprocessor definitions:
- *
- * - LINE_WIDTH
- * - XOR_FUNC_NAME(nr)
- * - LD_INOUT_LINE(buf)
- * - LD_AND_XOR_LINE(buf)
- * - ST_LINE(buf)
- */
-
-void XOR_FUNC_NAME(2)(unsigned long bytes,
-		      unsigned long * __restrict v1,
-		      const unsigned long * __restrict v2)
-{
-	unsigned long lines = bytes / LINE_WIDTH;
-
-	do {
-		__asm__ __volatile__ (
-			LD_INOUT_LINE(v1)
-			LD_AND_XOR_LINE(v2)
-			ST_LINE(v1)
-		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
-		);
-
-		v1 += LINE_WIDTH / sizeof(unsigned long);
-		v2 += LINE_WIDTH / sizeof(unsigned long);
-	} while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(3)(unsigned long bytes,
-		      unsigned long * __restrict v1,
-		      const unsigned long * __restrict v2,
-		      const unsigned long * __restrict v3)
-{
-	unsigned long lines = bytes / LINE_WIDTH;
-
-	do {
-		__asm__ __volatile__ (
-			LD_INOUT_LINE(v1)
-			LD_AND_XOR_LINE(v2)
-			LD_AND_XOR_LINE(v3)
-			ST_LINE(v1)
-		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
-		);
-
-		v1 += LINE_WIDTH / sizeof(unsigned long);
-		v2 += LINE_WIDTH / sizeof(unsigned long);
-		v3 += LINE_WIDTH / sizeof(unsigned long);
-	} while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(4)(unsigned long bytes,
-		      unsigned long * __restrict v1,
-		      const unsigned long * __restrict v2,
-		      const unsigned long * __restrict v3,
-		      const unsigned long * __restrict v4)
-{
-	unsigned long lines = bytes / LINE_WIDTH;
-
-	do {
-		__asm__ __volatile__ (
-			LD_INOUT_LINE(v1)
-			LD_AND_XOR_LINE(v2)
-			LD_AND_XOR_LINE(v3)
-			LD_AND_XOR_LINE(v4)
-			ST_LINE(v1)
-		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
-		: "memory"
-		);
-
-		v1 += LINE_WIDTH / sizeof(unsigned long);
-		v2 += LINE_WIDTH / sizeof(unsigned long);
-		v3 += LINE_WIDTH / sizeof(unsigned long);
-		v4 += LINE_WIDTH / sizeof(unsigned long);
-	} while (--lines > 0);
-}
-
-void XOR_FUNC_NAME(5)(unsigned long bytes,
-		      unsigned long * __restrict v1,
-		      const unsigned long * __restrict v2,
-		      const unsigned long * __restrict v3,
-		      const unsigned long * __restrict v4,
-		      const unsigned long * __restrict v5)
-{
-	unsigned long lines = bytes / LINE_WIDTH;
-
-	do {
-		__asm__ __volatile__ (
-			LD_INOUT_LINE(v1)
-			LD_AND_XOR_LINE(v2)
-			LD_AND_XOR_LINE(v3)
-			LD_AND_XOR_LINE(v4)
-			LD_AND_XOR_LINE(v5)
-			ST_LINE(v1)
-		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
-		    [v5] "r"(v5) : "memory"
-		);
-
-		v1 += LINE_WIDTH / sizeof(unsigned long);
-		v2 += LINE_WIDTH / sizeof(unsigned long);
-		v3 += LINE_WIDTH / sizeof(unsigned long);
-		v4 += LINE_WIDTH / sizeof(unsigned long);
-		v5 += LINE_WIDTH / sizeof(unsigned long);
-	} while (--lines > 0);
-}
diff --git a/lib/raid/xor/loongarch/xor_simd.c a/lib/raid/xor/loongarch/xor_simd.c
new file mode 100664
--- /dev/null
+++ a/lib/raid/xor/loongarch/xor_simd.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)	\
+	"vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)	\
+	"vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)	"vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)	\
+	LD(0, base, 0)		\
+	LD(1, base, 16)		\
+	LD(2, base, 32)		\
+	LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)	\
+	LD(4, base, 0)		\
+	LD(5, base, 16)		\
+	LD(6, base, 32)		\
+	LD(7, base, 48)		\
+	XOR(0, 4)		\
+	XOR(1, 5)		\
+	XOR(2, 6)		\
+	XOR(3, 7)
+
+#define ST_LINE(base)		\
+	ST(0, base, 0)		\
+	ST(1, base, 16)		\
+	ST(2, base, 32)		\
+	ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)	\
+	"xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)	\
+	"xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)	"xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)	\
+	LD(0, base, 0)		\
+	LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)	\
+	LD(2, base, 0)		\
+	LD(3, base, 32)		\
+	XOR(0, 2)		\
+	XOR(1, 3)
+
+#define ST_LINE(base)		\
+	ST(0, base, 0)		\
+	ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid/xor/loongarch/xor_simd_glue.c a/lib/raid/xor/loongarch/xor_simd_glue.c
new file mode 100664
--- /dev/null
+++ a/lib/raid/xor/loongarch/xor_simd_glue.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/sched.h>
+#include <linux/raid/xor_impl.h>
+#include <asm/fpu.h>
+#include <asm/xor.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)							\
+static void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,\
+		      const unsigned long * __restrict p2)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_2(bytes, p1, p2);					\
+	kernel_fpu_end();							\
+}										\
+
+#define MAKE_XOR_GLUE_3(flavor)							\
+static void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_3(bytes, p1, p2, p3);					\
+	kernel_fpu_end();							\
+}										\
+
+#define MAKE_XOR_GLUE_4(flavor)							\
+static void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3,			\
+		      const unsigned long * __restrict p4)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_4(bytes, p1, p2, p3, p4);				\
+	kernel_fpu_end();							\
+}										\
+
+#define MAKE_XOR_GLUE_5(flavor)							\
+static void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,\
+		      const unsigned long * __restrict p2,			\
+		      const unsigned long * __restrict p3,			\
+		      const unsigned long * __restrict p4,			\
+		      const unsigned long * __restrict p5)			\
+{										\
+	kernel_fpu_begin();							\
+	__xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);				\
+	kernel_fpu_end();							\
+}										\
+
+#define MAKE_XOR_GLUES(flavor)				\
+	MAKE_XOR_GLUE_2(flavor);			\
+	MAKE_XOR_GLUE_3(flavor);			\
+	MAKE_XOR_GLUE_4(flavor);			\
+	MAKE_XOR_GLUE_5(flavor);			\
+							\
+struct xor_block_template xor_block_##flavor = {	\
+	.name = __stringify(flavor),			\
+	.do_2 = xor_##flavor##_2,			\
+	.do_3 = xor_##flavor##_3,			\
+	.do_4 = xor_##flavor##_4,			\
+	.do_5 = xor_##flavor##_5,			\
+}
+
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid/xor/loongarch/xor_simd.h a/lib/raid/xor/loongarch/xor_simd.h
new file mode 100664
--- /dev/null
+++ a/lib/raid/xor/loongarch/xor_simd.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		 const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+		 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		  const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+		  const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+		  const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/lib/raid/xor/loongarch/xor_template.c a/lib/raid/xor/loongarch/xor_template.c
new file mode 100664
--- /dev/null
+++ a/lib/raid/xor/loongarch/xor_template.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3,
+		      const unsigned long * __restrict v4)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			LD_AND_XOR_LINE(v4)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+		: "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+		v4 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+		      unsigned long * __restrict v1,
+		      const unsigned long * __restrict v2,
+		      const unsigned long * __restrict v3,
+		      const unsigned long * __restrict v4,
+		      const unsigned long * __restrict v5)
+{
+	unsigned long lines = bytes / LINE_WIDTH;
+
+	do {
+		__asm__ __volatile__ (
+			LD_INOUT_LINE(v1)
+			LD_AND_XOR_LINE(v2)
+			LD_AND_XOR_LINE(v3)
+			LD_AND_XOR_LINE(v4)
+			LD_AND_XOR_LINE(v5)
+			ST_LINE(v1)
+		: : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+		    [v5] "r"(v5) : "memory"
+		);
+
+		v1 += LINE_WIDTH / sizeof(unsigned long);
+		v2 += LINE_WIDTH / sizeof(unsigned long);
+		v3 += LINE_WIDTH / sizeof(unsigned long);
+		v4 += LINE_WIDTH / sizeof(unsigned long);
+		v5 += LINE_WIDTH / sizeof(unsigned long);
+	} while (--lines > 0);
+}
--- a/lib/raid/xor/Makefile~loongarch-move-the-xor-code-to-lib-raid
+++ a/lib/raid/xor/Makefile
@@ -14,6 +14,8 @@ ifeq ($(CONFIG_ARM),y)
 xor-$(CONFIG_KERNEL_MODE_NEON)	+= arm/xor-neon.o arm/xor-neon-glue.o
 endif
 xor-$(CONFIG_ARM64)		+= arm64/xor-neon.o arm64/xor-neon-glue.o
+xor-$(CONFIG_CPU_HAS_LSX)	+= loongarch/xor_simd.o
+xor-$(CONFIG_CPU_HAS_LSX)	+= loongarch/xor_simd_glue.o
 
 
 CFLAGS_arm/xor-neon.o		+= $(CC_FLAGS_FPU)
_

Patches currently in -mm which might be from hch@lst.de are



                 reply	other threads:[~2026-04-03  6:41 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260403064152.87C68C4CEF7@smtp.kernel.org \
    --to=akpm@linux-foundation.org \
    --cc=agordeev@linux.ibm.com \
    --cc=alex@ghiti.fr \
    --cc=andreas@gaisler.com \
    --cc=anton.ivanov@cambridgegreys.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=borntraeger@linux.ibm.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=chenhuacai@kernel.org \
    --cc=clm@fb.com \
    --cc=dan.j.williams@intel.com \
    --cc=davem@davemloft.net \
    --cc=dsterba@suse.com \
    --cc=ebiggers@kernel.org \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=hch@lst.de \
    --cc=herbert@gondor.apana.org.au \
    --cc=hpa@zytor.com \
    --cc=jason@zx2c4.com \
    --cc=johannes@sipsolutions.net \
    --cc=kernel@xen0n.name \
    --cc=linan122@huawei.com \
    --cc=linmag7@gmail.com \
    --cc=linux@armlinux.org.uk \
    --cc=maddy@linux.ibm.com \
    --cc=mattst88@gmail.com \
    --cc=mingo@redhat.com \
    --cc=mm-commits@vger.kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=npiggin@gmail.com \
    --cc=palmer@dabbelt.com \
    --cc=richard.henderson@linaro.org \
    --cc=richard@nod.at \
    --cc=song@kernel.org \
    --cc=svens@linux.ibm.com \
    --cc=tytso@mit.edu \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox