From: Eric Biggers <ebiggers@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-crypto@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
Zhihang Shao <zhihang.shao.iscas@gmail.com>,
linux-riscv@lists.infradead.org
Subject: [PATCH v4] riscv/crc-t10dif: Optimize crct10dif with zbc extension
Date: Wed, 12 Feb 2025 12:07:23 -0800 [thread overview]
Message-ID: <20250212200723.135894-1-ebiggers@kernel.org> (raw)
From: Zhihang Shao <zhihang.shao.iscas@gmail.com>
The current CRC-T10DIF algorithm on RISC-V platform is based on
table-lookup optimization. Given the previous work on optimizing crc32
calculations with zbc extension, it is believed that this will be
equally effective for accelerating crc-t10dif.
Therefore this patch adds an implementation of crc-t10dif using zbc
extension. It detects whether the current runtime environment supports
zbc feature and, if so, uses it to accelerate crc-t10dif calculations.
This patch is updated due to the patchset of updating kernel's
CRC-T10DIF library in 6.14, which is finished by Eric Biggers. Also, I
used crc_kunit.c to test the performance of crc-t10dif optimized by crc
extension.
Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
[EB: fixed 32-bit build, added comments that explain the algorithm used,
and various other cleanups]
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
This patch applies to
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/log/?h=crc-next
arch/riscv/Kconfig | 1 +
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/crc-t10dif-riscv.c | 131 ++++++++++++++++++++++++++++++
3 files changed, 133 insertions(+)
create mode 100644 arch/riscv/lib/crc-t10dif-riscv.c
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7612c52e9b1e3..db1cf9666dfdd 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -23,10 +23,11 @@ config RISCV
select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
+ select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 79368a895feed..d1d1f3d880e32 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -14,8 +14,9 @@ lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
+obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-t10dif-riscv.c b/arch/riscv/lib/crc-t10dif-riscv.c
new file mode 100644
index 0000000000000..2e9c3dcba8a0e
--- /dev/null
+++ b/arch/riscv/lib/crc-t10dif-riscv.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension.
+ *
+ * Copyright (C) 2024 Institute of Software, CAS.
+ */
+
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+#include <asm/hwcap.h>
+
+#include <linux/crc-t10dif.h>
+#include <linux/module.h>
+
+/*
+ * CRC-T10DIF is a 16-bit CRC that uses most-significant-bit-first bit order,
+ * i.e. bit i contains the coefficient of x^i (not reflected).
+ */
+
+#define CRCT10DIF_POLY 0x18bb7 /* The generator polynomial G */
+
+#if __riscv_xlen == 64
+#define CRCT10DIF_QUOTIENT_POLY 0xf65a57f81d33a48a /* floor(x^80 / G) - x^64 */
+#define load_be_long(x) be64_to_cpup(x)
+#elif __riscv_xlen == 32
+#define CRCT10DIF_QUOTIENT_POLY 0xf65a57f8 /* floor(x^48 / G) - x^32 */
+#define load_be_long(x) be32_to_cpup(x)
+#else
+#error "Unsupported __riscv_xlen"
+#endif
+
+/*
+ * Multiply the XLEN-bit message polynomial @m by x^16 and reduce it modulo the
+ * generator polynomial G. This gives the CRC of the message polynomial @m.
+ */
+static inline u16 crct10dif_zbc(unsigned long m)
+{
+ u16 crc;
+
+ asm volatile(".option push\n"
+ ".option arch,+zbc\n"
+ /*
+ * First step of Barrett reduction with integrated
+ * multiplication by x^16:
+ *
+ * %0 := floor((m * floor(x^(XLEN+16) / G)) / x^XLEN)
+ *
+ * The resulting value is equal to floor((m * x^16) / G).
+ *
+ * The constant floor(x^(XLEN+16) / G) has degree x^XLEN,
+ * i.e. it has XLEN+1 bits. The clmulh instruction
+ * multiplies m by the x^0 through x^(XLEN-1) terms of this
+ * constant and does the floored division by x^XLEN. The
+ * xor instruction handles the x^XLEN term of the constant
+ * by adding an additional (m * x^XLEN) / x^XLEN = m.
+ */
+ "clmulh %0, %1, %2\n"
+ "xor %0, %0, %1\n"
+ /*
+ * Second step of Barrett reduction:
+ *
+ * crc := (m * x^16) + (G * floor((m * x^16) / G))
+ *
+ * This reduces (m * x^16) modulo G by adding the
+ * appropriate multiple of G to it. The result uses only
+ * the x^0 through x^15 terms. HOWEVER, since the
+ * unreduced value (m * x^16) is zero in those terms in the
+ * first place, it is more efficient to do the equivalent:
+ *
+ * crc := (G * floor((m * x^16) / G)) mod x^16
+ */
+ "clmul %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (m),
+ "r" (CRCT10DIF_QUOTIENT_POLY),
+ "r" (CRCT10DIF_POLY));
+ return crc;
+}
+
+static inline u16 crct10dif_unaligned(u16 crc, const u8 *p, size_t len)
+{
+ unsigned long m;
+ size_t i;
+
+ if (len == 1)
+ return crct10dif_zbc(p[0] ^ (crc >> 8)) ^ (crc << 8);
+
+ /* assuming len >= 2 here */
+ m = crc ^ (p[0] << 8) ^ p[1];
+ for (i = 2; i < len; i++)
+ m = (m << 8) ^ p[i];
+ return crct10dif_zbc(m);
+}
+
+u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
+{
+ size_t align;
+ unsigned long m;
+
+ asm goto(ALTERNATIVE("j %l[fallback]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1) : : : : fallback);
+
+ align = -(unsigned long)p % sizeof(unsigned long);
+ if (align && len) {
+ align = min(align, len);
+ crc = crct10dif_unaligned(crc, p, align);
+ p += align;
+ len -= align;
+ }
+
+ while (len >= sizeof(unsigned long)) {
+ m = ((unsigned long)crc << (8 * sizeof(unsigned long) - 16)) ^
+ load_be_long((const void *)p);
+ crc = crct10dif_zbc(m);
+ p += sizeof(unsigned long);
+ len -= sizeof(unsigned long);
+ }
+
+ if (len)
+ crc = crct10dif_unaligned(crc, p, len);
+
+ return crc;
+
+fallback:
+ return crc_t10dif_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension");
+MODULE_LICENSE("GPL");
base-commit: 4ffd50862d41e5aaf2e749efa354afaa1317c309
--
2.48.1
WARNING: multiple messages have this Message-ID (diff)
From: Eric Biggers <ebiggers@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-crypto@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
Zhihang Shao <zhihang.shao.iscas@gmail.com>,
linux-riscv@lists.infradead.org
Subject: [PATCH v4] riscv/crc-t10dif: Optimize crct10dif with zbc extension
Date: Wed, 12 Feb 2025 12:07:23 -0800 [thread overview]
Message-ID: <20250212200723.135894-1-ebiggers@kernel.org> (raw)
From: Zhihang Shao <zhihang.shao.iscas@gmail.com>
The current CRC-T10DIF algorithm on RISC-V platform is based on
table-lookup optimization. Given the previous work on optimizing crc32
calculations with zbc extension, it is believed that this will be
equally effective for accelerating crc-t10dif.
Therefore this patch adds an implementation of crc-t10dif using zbc
extension. It detects whether the current runtime environment supports
zbc feature and, if so, uses it to accelerate crc-t10dif calculations.
This patch is updated due to the patchset of updating kernel's
CRC-T10DIF library in 6.14, which is finished by Eric Biggers. Also, I
used crc_kunit.c to test the performance of crc-t10dif optimized by crc
extension.
Signed-off-by: Zhihang Shao <zhihang.shao.iscas@gmail.com>
[EB: fixed 32-bit build, added comments that explain the algorithm used,
and various other cleanups]
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
This patch applies to
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/log/?h=crc-next
arch/riscv/Kconfig | 1 +
arch/riscv/lib/Makefile | 1 +
arch/riscv/lib/crc-t10dif-riscv.c | 131 ++++++++++++++++++++++++++++++
3 files changed, 133 insertions(+)
create mode 100644 arch/riscv/lib/crc-t10dif-riscv.c
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7612c52e9b1e3..db1cf9666dfdd 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -23,10 +23,11 @@ config RISCV
select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
+ select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 79368a895feed..d1d1f3d880e32 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -14,8 +14,9 @@ lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
+obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc-t10dif-riscv.c b/arch/riscv/lib/crc-t10dif-riscv.c
new file mode 100644
index 0000000000000..2e9c3dcba8a0e
--- /dev/null
+++ b/arch/riscv/lib/crc-t10dif-riscv.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC-T10DIF implementation with RISC-V Zbc extension.
+ *
+ * Copyright (C) 2024 Institute of Software, CAS.
+ */
+
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+#include <asm/hwcap.h>
+
+#include <linux/crc-t10dif.h>
+#include <linux/module.h>
+
+/*
+ * CRC-T10DIF is a 16-bit CRC that uses most-significant-bit-first bit order,
+ * i.e. bit i contains the coefficient of x^i (not reflected).
+ */
+
+#define CRCT10DIF_POLY 0x18bb7 /* The generator polynomial G */
+
+#if __riscv_xlen == 64
+#define CRCT10DIF_QUOTIENT_POLY 0xf65a57f81d33a48a /* floor(x^80 / G) - x^64 */
+#define load_be_long(x) be64_to_cpup(x)
+#elif __riscv_xlen == 32
+#define CRCT10DIF_QUOTIENT_POLY 0xf65a57f8 /* floor(x^48 / G) - x^32 */
+#define load_be_long(x) be32_to_cpup(x)
+#else
+#error "Unsupported __riscv_xlen"
+#endif
+
+/*
+ * Multiply the XLEN-bit message polynomial @m by x^16 and reduce it modulo the
+ * generator polynomial G. This gives the CRC of the message polynomial @m.
+ */
+static inline u16 crct10dif_zbc(unsigned long m)
+{
+ u16 crc;
+
+ asm volatile(".option push\n"
+ ".option arch,+zbc\n"
+ /*
+ * First step of Barrett reduction with integrated
+ * multiplication by x^16:
+ *
+ * %0 := floor((m * floor(x^(XLEN+16) / G)) / x^XLEN)
+ *
+ * The resulting value is equal to floor((m * x^16) / G).
+ *
+ * The constant floor(x^(XLEN+16) / G) has degree x^XLEN,
+ * i.e. it has XLEN+1 bits. The clmulh instruction
+ * multiplies m by the x^0 through x^(XLEN-1) terms of this
+ * constant and does the floored division by x^XLEN. The
+ * xor instruction handles the x^XLEN term of the constant
+ * by adding an additional (m * x^XLEN) / x^XLEN = m.
+ */
+ "clmulh %0, %1, %2\n"
+ "xor %0, %0, %1\n"
+ /*
+ * Second step of Barrett reduction:
+ *
+ * crc := (m * x^16) + (G * floor((m * x^16) / G))
+ *
+ * This reduces (m * x^16) modulo G by adding the
+ * appropriate multiple of G to it. The result uses only
+ * the x^0 through x^15 terms. HOWEVER, since the
+ * unreduced value (m * x^16) is zero in those terms in the
+ * first place, it is more efficient to do the equivalent:
+ *
+ * crc := (G * floor((m * x^16) / G)) mod x^16
+ */
+ "clmul %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (m),
+ "r" (CRCT10DIF_QUOTIENT_POLY),
+ "r" (CRCT10DIF_POLY));
+ return crc;
+}
+
+static inline u16 crct10dif_unaligned(u16 crc, const u8 *p, size_t len)
+{
+ unsigned long m;
+ size_t i;
+
+ if (len == 1)
+ return crct10dif_zbc(p[0] ^ (crc >> 8)) ^ (crc << 8);
+
+ /* assuming len >= 2 here */
+ m = crc ^ (p[0] << 8) ^ p[1];
+ for (i = 2; i < len; i++)
+ m = (m << 8) ^ p[i];
+ return crct10dif_zbc(m);
+}
+
+u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
+{
+ size_t align;
+ unsigned long m;
+
+ asm goto(ALTERNATIVE("j %l[fallback]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1) : : : : fallback);
+
+ align = -(unsigned long)p % sizeof(unsigned long);
+ if (align && len) {
+ align = min(align, len);
+ crc = crct10dif_unaligned(crc, p, align);
+ p += align;
+ len -= align;
+ }
+
+ while (len >= sizeof(unsigned long)) {
+ m = ((unsigned long)crc << (8 * sizeof(unsigned long) - 16)) ^
+ load_be_long((const void *)p);
+ crc = crct10dif_zbc(m);
+ p += sizeof(unsigned long);
+ len -= sizeof(unsigned long);
+ }
+
+ if (len)
+ crc = crct10dif_unaligned(crc, p, len);
+
+ return crc;
+
+fallback:
+ return crc_t10dif_generic(crc, p, len);
+}
+EXPORT_SYMBOL(crc_t10dif_arch);
+
+MODULE_DESCRIPTION("CRC-T10DIF using RISC-V ZBC Extension");
+MODULE_LICENSE("GPL");
base-commit: 4ffd50862d41e5aaf2e749efa354afaa1317c309
--
2.48.1
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
next reply other threads:[~2025-02-12 20:07 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-12 20:07 Eric Biggers [this message]
2025-02-12 20:07 ` [PATCH v4] riscv/crc-t10dif: Optimize crct10dif with zbc extension Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250212200723.135894-1-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=ardb@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=zhihang.shao.iscas@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.