* [PATCH v3 01/18] lib/crc32: drop leading underscores from __crc32c_le_base
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 02/18] lib/crc32: improve support for arch-specific overrides Eric Biggers
` (16 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Remove the leading underscores from __crc32c_le_base().
This is in preparation for adding crc32c_le_arch() and eventually
renaming __crc32c_le() to crc32c_le().
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/arm64/lib/crc32-glue.c | 2 +-
arch/riscv/lib/crc32.c | 2 +-
crypto/crc32c_generic.c | 8 ++++----
include/linux/crc32.h | 2 +-
lib/crc32.c | 4 ++--
lib/crc32test.c | 2 +-
6 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
index 295ae3e6b997..ad015223d15d 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -42,11 +42,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
- return __crc32c_le_base(crc, p, len);
+ return crc32c_le_base(crc, p, len);
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
kernel_neon_begin();
crc = crc32c_le_arm64_4way(crc, p, len);
kernel_neon_end();
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c
index d7dc599af3ef..333fb7af1192 100644
--- a/arch/riscv/lib/crc32.c
+++ b/arch/riscv/lib/crc32.c
@@ -224,11 +224,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
- CRC32C_POLY_QT_LE, __crc32c_le_base);
+ CRC32C_POLY_QT_LE, crc32c_le_base);
}
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
size_t len)
{
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 7c2357c30fdf..635599b255ec 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -83,11 +83,11 @@ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
- ctx->crc = __crc32c_le_base(ctx->crc, data, length);
+ ctx->crc = crc32c_le_base(ctx->crc, data, length);
return 0;
}
static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
unsigned int length)
@@ -106,11 +106,11 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
return 0;
}
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
{
- put_unaligned_le32(~__crc32c_le_base(*crcp, data, len), out);
+ put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out);
return 0;
}
static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
@@ -198,16 +198,16 @@ static struct shash_alg algs[] = {{
}};
static int __init crc32c_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base));
+ return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
}
static void __exit crc32c_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base));
+ crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
}
subsys_initcall(crc32c_mod_init);
module_exit(crc32c_mod_fini);
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 87f788c0d607..5b07fc9081c4 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -37,11 +37,11 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
{
return crc32_le_shift(crc1, len2) ^ crc2;
}
u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
/**
* __crc32c_le_combine - Combine two crc32c check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
* and len2, __crc32c_le() check values were calculated
diff --git a/lib/crc32.c b/lib/crc32.c
index ff587fee3893..c67059b0082b 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -205,12 +205,12 @@ EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);
u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
EXPORT_SYMBOL(crc32_le_base);
-u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
-EXPORT_SYMBOL(__crc32c_le_base);
+u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+EXPORT_SYMBOL(crc32c_le_base);
u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
/*
* This multiplies the polynomials x and y modulo the given modulus.
diff --git a/lib/crc32test.c b/lib/crc32test.c
index 03cf5c1f2f5d..30b8da4d8be4 100644
--- a/lib/crc32test.c
+++ b/lib/crc32test.c
@@ -824,11 +824,11 @@ static void crc32test_regenerate(void)
for (i = 0; i < ARRAY_SIZE(test); i++) {
pr_info("{0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x},\n",
test[i].crc, test[i].start, test[i].length,
crc32_le_base(test[i].crc, test_buf + test[i].start, test[i].length),
crc32_be_base(test[i].crc, test_buf + test[i].start, test[i].length),
- __crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length));
+ crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length));
}
}
static int __init crc32test_init(void)
{
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 02/18] lib/crc32: improve support for arch-specific overrides
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
2024-11-03 22:31 ` [PATCH v3 01/18] lib/crc32: drop leading underscores from __crc32c_le_base Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime Eric Biggers
` (15 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Currently the CRC32 library functions are defined as weak symbols, and
the arm64 and riscv architectures override them.
This method of arch-specific overrides has the limitation that it only
works when both the base and arch code is built-in. Also, it makes the
arch-specific code be silently not used if it is accidentally built with
lib-y instead of obj-y; unfortunately the RISC-V code does this.
This commit reorganizes the code to have explicit *_arch() functions
that are called when they are enabled, similar to how some of the crypto
library code works (e.g. chacha_crypt() calls chacha_crypt_arch()).
Make the existing kconfig choice for the CRC32 implementation also
control whether the arch-optimized implementation (if one is available)
is enabled or not. Make it enabled by default if CRC32 is also enabled.
The result is that arch-optimized CRC32 library functions will be
included automatically when appropriate, but it is now possible to
disable them. They can also now be built as a loadable module if the
CRC32 library functions happen to be used only by loadable modules, in
which case the arch and base CRC32 modules will be automatically loaded
via direct symbol dependency when appropriate.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/arm64/Kconfig | 1 +
arch/arm64/lib/Makefile | 3 +-
arch/arm64/lib/crc32-glue.c | 13 ++++-
arch/riscv/Kconfig | 1 +
arch/riscv/lib/Makefile | 3 +-
arch/riscv/lib/{crc32.c => crc32-riscv.c} | 13 ++++-
crypto/crc32_generic.c | 4 +-
crypto/crc32c_generic.c | 4 +-
include/linux/crc32.h | 35 +++++++++---
lib/Kconfig | 70 +++++++++++++++++------
lib/crc32.c | 22 +++----
11 files changed, 118 insertions(+), 51 deletions(-)
rename arch/riscv/lib/{crc32.c => crc32-riscv.c} (94%)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fd9df6dcc593..1e48f40f654e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -19,10 +19,11 @@ config ARM64
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_CACHE_LINE_SIZE
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_OPS if XEN
select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 8e882f479d98..5fbcf0d56665 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -11,11 +11,12 @@ CFLAGS_xor-neon.o += $(CC_FLAGS_FPU)
CFLAGS_REMOVE_xor-neon.o += $(CC_FLAGS_NO_FPU)
endif
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
-obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
+crc32-arm64-y := crc32.o crc32-glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_ARM64_MTE) += mte.o
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
index ad015223d15d..d7f6e1cbf0d2 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/crc32.h>
#include <linux/linkage.h>
+#include <linux/module.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
@@ -19,11 +20,11 @@ asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_le_base(crc, p, len);
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
@@ -38,12 +39,13 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
return crc;
}
return crc32_le_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32_le_arch);
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32c_le_base(crc, p, len);
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
@@ -58,12 +60,13 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
return crc;
}
return crc32c_le_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32c_le_arch);
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_be_base(crc, p, len);
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
@@ -78,5 +81,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
return crc;
}
return crc32_be_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f4c570538d55..046f0cb119da 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -22,10 +22,11 @@ config RISCV
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP
select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 8eec6b69a875..79368a895fee 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -13,10 +13,9 @@ ifeq ($(CONFIG_MMU), y)
lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o
endif
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o
-
+obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32-riscv.c
similarity index 94%
rename from arch/riscv/lib/crc32.c
rename to arch/riscv/lib/crc32-riscv.c
index 333fb7af1192..a3ff7db2a1ce 100644
--- a/arch/riscv/lib/crc32.c
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -12,10 +12,11 @@
#include <linux/types.h>
#include <linux/minmax.h>
#include <linux/crc32poly.h>
#include <linux/crc32.h>
#include <linux/byteorder/generic.h>
+#include <linux/module.h>
/*
* Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
* better understanding of how this math works.
*
@@ -215,21 +216,23 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
legacy:
return crc_fb(crc, p, len);
}
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
crc32_le_base);
}
+EXPORT_SYMBOL(crc32_le_arch);
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
CRC32C_POLY_QT_LE, crc32c_le_base);
}
+EXPORT_SYMBOL(crc32c_le_arch);
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
size_t len)
{
size_t bits = len * 8;
@@ -251,11 +254,11 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
crc ^= crc_low;
return crc;
}
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
size_t offset, head_len, tail_len;
unsigned long const *p_ul;
unsigned long s;
@@ -290,5 +293,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
return crc;
legacy:
return crc32_be_base(crc, p, len);
}
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
index 6a55d206fab3..cc064ea8240e 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -158,16 +158,16 @@ static struct shash_alg algs[] = {{
}};
static int __init crc32_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+ return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
static void __exit crc32_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+ crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
subsys_initcall(crc32_mod_init);
module_exit(crc32_mod_fini);
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 635599b255ec..04b03d825cf4 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -198,16 +198,16 @@ static struct shash_alg algs[] = {{
}};
static int __init crc32c_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+ return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
static void __exit crc32c_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+ crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
subsys_initcall(crc32c_mod_init);
module_exit(crc32c_mod_fini);
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 5b07fc9081c4..58c632533b08 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -6,14 +6,38 @@
#define _LINUX_CRC32_H
#include <linux/types.h>
#include <linux/bitrev.h>
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
+
+static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32_le_arch(crc, p, len);
+ return crc32_le_base(crc, p, len);
+}
+
+static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32_be_arch(crc, p, len);
+ return crc32_be_base(crc, p, len);
+}
+
+/* TODO: leading underscores should be dropped once callers have been updated */
+static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32c_le_arch(crc, p, len);
+ return crc32c_le_base(crc, p, len);
+}
/**
* crc32_le_combine - Combine two crc32 check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
* and len2, crc32_le() check values were calculated
@@ -36,13 +60,10 @@ u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
{
return crc32_le_shift(crc1, len2) ^ crc2;
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
-
/**
* __crc32c_le_combine - Combine two crc32c check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
* and len2, __crc32c_le() check values were calculated
* for each, crc1 and crc2.
diff --git a/lib/Kconfig b/lib/Kconfig
index b38849af6f13..07afcf214f35 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -176,10 +176,13 @@ config CRC32
This option is provided for the case where no in-kernel-tree
modules require CRC32/CRC32c functions, but a module built outside
the kernel tree does. Such modules that use library CRC32/CRC32c
functions require M here.
+config ARCH_HAS_CRC32
+ bool
+
config CRC32_SELFTEST
tristate "CRC32 perform self test on init"
depends on CRC32
help
This option enables the CRC32 library functions to perform a
@@ -188,54 +191,89 @@ config CRC32_SELFTEST
and computes the total elapsed time and number of bytes processed.
choice
prompt "CRC32 implementation"
depends on CRC32
- default CRC32_SLICEBY8
+ default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
+ default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
help
- This option allows a kernel builder to override the default choice
- of CRC32 algorithm. Choose the default ("slice by 8") unless you
- know that you need one of the others.
+ This option allows you to override the default choice of CRC32
+ implementation. Choose the default unless you know that you need one
+ of the others.
-config CRC32_SLICEBY8
+config CRC32_IMPL_ARCH_PLUS_SLICEBY8
+ bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
+ help
+ Use architecture-optimized implementation of CRC32. Fall back to
+ slice-by-8 in cases where the arch-optimized implementation cannot be
+ used, e.g. if the CPU lacks support for the needed instructions.
+
+ This is the default when an arch-optimized implementation exists.
+
+config CRC32_IMPL_ARCH_PLUS_SLICEBY1
+ bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
+ help
+ Use architecture-optimized implementation of CRC32, but fall back to
+ slice-by-1 instead of slice-by-8 in order to reduce the binary size.
+
+config CRC32_IMPL_SLICEBY8
bool "Slice by 8 bytes"
help
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
- This is the fastest algorithm, but comes with a 8KiB lookup table.
- Most modern processors have enough cache to hold this table without
- thrashing the cache.
-
- This is the default implementation choice. Choose this one unless
- you have a good reason not to.
+ This is much slower than the architecture-optimized implementation of
+ CRC32 (if the selected arch has one), but it is portable and is the
+ fastest implementation when no arch-optimized implementation is
+ available. It uses an 8KiB lookup table. Most modern processors have
+ enough cache to hold this table without thrashing the cache.
-config CRC32_SLICEBY4
+config CRC32_IMPL_SLICEBY4
bool "Slice by 4 bytes"
help
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
This is a bit slower than slice by 8, but has a smaller 4KiB lookup
table.
Only choose this option if you know what you are doing.
-config CRC32_SARWATE
- bool "Sarwate's Algorithm (one byte at a time)"
+config CRC32_IMPL_SLICEBY1
+ bool "Slice by 1 byte (Sarwate's algorithm)"
help
Calculate checksum a byte at a time using Sarwate's algorithm. This
- is not particularly fast, but has a small 256 byte lookup table.
+ is not particularly fast, but has a small 1KiB lookup table.
Only choose this option if you know what you are doing.
-config CRC32_BIT
+config CRC32_IMPL_BIT
bool "Classic Algorithm (one bit at a time)"
help
Calculate checksum one bit at a time. This is VERY slow, but has
no lookup table. This is provided as a debugging option.
Only choose this option if you are debugging crc32.
endchoice
+config CRC32_ARCH
+ tristate
+ default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_SLICEBY8
+ bool
+ default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8
+
+config CRC32_SLICEBY4
+ bool
+ default y if CRC32_IMPL_SLICEBY4
+
+config CRC32_SARWATE
+ bool
+ default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_BIT
+ bool
+ default y if CRC32_IMPL_BIT
+
config CRC64
tristate "CRC64 functions"
help
This option is provided for the case where no in-kernel-tree
modules require CRC64 functions, but a module built outside
diff --git a/lib/crc32.c b/lib/crc32.c
index c67059b0082b..47151624332e 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -181,39 +181,31 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
#endif
return crc;
}
#if CRC_LE_BITS == 1
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
}
#endif
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(__crc32c_le);
-
-u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
EXPORT_SYMBOL(crc32_le_base);
-
-u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
EXPORT_SYMBOL(crc32c_le_base);
-u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
-
/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
* represents the highest power of x, and the msbit represents x^0.
*/
@@ -333,16 +325,16 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
# endif
return crc;
}
#if CRC_BE_BITS == 1
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
}
#else
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
}
#endif
-EXPORT_SYMBOL(crc32_be);
+EXPORT_SYMBOL(crc32_be_base);
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
2024-11-03 22:31 ` [PATCH v3 01/18] lib/crc32: drop leading underscores from __crc32c_le_base Eric Biggers
2024-11-03 22:31 ` [PATCH v3 02/18] lib/crc32: improve support for arch-specific overrides Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-04 10:55 ` Ard Biesheuvel
2024-11-03 22:31 ` [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms Eric Biggers
` (14 subsequent siblings)
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86
From: Eric Biggers <ebiggers@google.com>
Make the CRC32 library export a function crc32_optimizations() which
returns flags that indicate which CRC32 functions are actually executing
optimized code at runtime.
This will be used to determine whether the crc32[c]-$arch shash
algorithms should be registered in the crypto API. btrfs could also
start using these flags instead of the hack that it currently uses where
it parses the crypto_shash_driver_name.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/arm64/lib/crc32-glue.c | 10 ++++++++++
arch/riscv/lib/crc32-riscv.c | 10 ++++++++++
include/linux/crc32.h | 15 +++++++++++++++
3 files changed, 35 insertions(+)
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
index d7f6e1cbf0d2..15c4c9db573e 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -83,7 +83,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
return crc32_be_arm64(crc, p, len);
}
EXPORT_SYMBOL(crc32_be_arch);
+u32 crc32_optimizations(void)
+{
+ if (alternative_has_cap_likely(ARM64_HAS_CRC32))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
index a3ff7db2a1ce..53d56ab422c7 100644
--- a/arch/riscv/lib/crc32-riscv.c
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -295,7 +295,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
legacy:
return crc32_be_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_be_arch);
+u32 crc32_optimizations(void)
+{
+ if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 58c632533b08..e9bd40056687 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
if (IS_ENABLED(CONFIG_CRC32_ARCH))
return crc32c_le_arch(crc, p, len);
return crc32c_le_base(crc, p, len);
}
+/*
+ * crc32_optimizations() returns flags that indicate which CRC32 library
+ * functions are using architecture-specific optimizations. Unlike
+ * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
+ * variants and also whether any needed CPU features are available at runtime.
+ */
+#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */
+#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */
+#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */
+#if IS_ENABLED(CONFIG_CRC32_ARCH)
+u32 crc32_optimizations(void);
+#else
+static inline u32 crc32_optimizations(void) { return 0; }
+#endif
+
/**
* crc32_le_combine - Combine two crc32 check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
* and len2, crc32_le() check values were calculated
* for each, crc1 and crc2.
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime
2024-11-03 22:31 ` [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime Eric Biggers
@ 2024-11-04 10:55 ` Ard Biesheuvel
0 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2024-11-04 10:55 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86
On Sun, 3 Nov 2024 at 23:34, Eric Biggers <ebiggers@kernel.org> wrote:
>
> From: Eric Biggers <ebiggers@google.com>
>
> Make the CRC32 library export a function crc32_optimizations() which
> returns flags that indicate which CRC32 functions are actually executing
> optimized code at runtime.
>
> This will be used to determine whether the crc32[c]-$arch shash
> algorithms should be registered in the crypto API. btrfs could also
> start using these flags instead of the hack that it currently uses where
> it parses the crypto_shash_driver_name.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> arch/arm64/lib/crc32-glue.c | 10 ++++++++++
> arch/riscv/lib/crc32-riscv.c | 10 ++++++++++
> include/linux/crc32.h | 15 +++++++++++++++
> 3 files changed, 35 insertions(+)
>
> diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
> index d7f6e1cbf0d2..15c4c9db573e 100644
> --- a/arch/arm64/lib/crc32-glue.c
> +++ b/arch/arm64/lib/crc32-glue.c
> @@ -83,7 +83,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
>
> return crc32_be_arm64(crc, p, len);
> }
> EXPORT_SYMBOL(crc32_be_arch);
>
> +u32 crc32_optimizations(void)
> +{
> + if (alternative_has_cap_likely(ARM64_HAS_CRC32))
> + return CRC32_LE_OPTIMIZATION |
> + CRC32_BE_OPTIMIZATION |
> + CRC32C_OPTIMIZATION;
> + return 0;
> +}
> +EXPORT_SYMBOL(crc32_optimizations);
> +
> MODULE_LICENSE("GPL");
> MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
> diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
> index a3ff7db2a1ce..53d56ab422c7 100644
> --- a/arch/riscv/lib/crc32-riscv.c
> +++ b/arch/riscv/lib/crc32-riscv.c
> @@ -295,7 +295,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
> legacy:
> return crc32_be_base(crc, p, len);
> }
> EXPORT_SYMBOL(crc32_be_arch);
>
> +u32 crc32_optimizations(void)
> +{
> + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
> + return CRC32_LE_OPTIMIZATION |
> + CRC32_BE_OPTIMIZATION |
> + CRC32C_OPTIMIZATION;
> + return 0;
> +}
> +EXPORT_SYMBOL(crc32_optimizations);
> +
> MODULE_LICENSE("GPL");
> MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
> diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> index 58c632533b08..e9bd40056687 100644
> --- a/include/linux/crc32.h
> +++ b/include/linux/crc32.h
> @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
> if (IS_ENABLED(CONFIG_CRC32_ARCH))
> return crc32c_le_arch(crc, p, len);
> return crc32c_le_base(crc, p, len);
> }
>
> +/*
> + * crc32_optimizations() returns flags that indicate which CRC32 library
> + * functions are using architecture-specific optimizations. Unlike
> + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32
> + * variants and also whether any needed CPU features are available at runtime.
> + */
> +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */
> +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */
> +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */
> +#if IS_ENABLED(CONFIG_CRC32_ARCH)
> +u32 crc32_optimizations(void);
> +#else
> +static inline u32 crc32_optimizations(void) { return 0; }
> +#endif
> +
> /**
> * crc32_le_combine - Combine two crc32 check values into one. For two
> * sequences of bytes, seq1 and seq2 with lengths len1
> * and len2, crc32_le() check values were calculated
> * for each, crc1 and crc2.
> --
> 2.47.0
>
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (2 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-04 10:56 ` Ard Biesheuvel
2024-11-03 22:31 ` [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib Eric Biggers
` (13 subsequent siblings)
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86
From: Eric Biggers <ebiggers@google.com>
Instead of registering the crc32-$arch and crc32c-$arch algorithms if
the arch-specific code was built, only register them when that code was
built *and* is not falling back to the base implementation at runtime.
This avoids confusing users like btrfs which checks the shash driver
name to determine whether it is crc32c-generic.
(It would also make sense to change btrfs to test the crc32_optimization
flags itself, so that it doesn't have to use the weird hack of parsing
the driver name. This change still makes sense either way though.)
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
crypto/crc32_generic.c | 8 ++++++--
crypto/crc32c_generic.c | 8 ++++++--
2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
index cc064ea8240e..783a30b27398 100644
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -155,19 +155,23 @@ static struct shash_alg algs[] = {{
.base.cra_ctxsize = sizeof(u32),
.base.cra_module = THIS_MODULE,
.base.cra_init = crc32_cra_init,
}};
+static int num_algs;
+
static int __init crc32_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
+ num_algs = 1 + ((crc32_optimizations() & CRC32_LE_OPTIMIZATION) != 0);
+
+ return crypto_register_shashes(algs, num_algs);
}
static void __exit crc32_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
+ crypto_unregister_shashes(algs, num_algs);
}
subsys_initcall(crc32_mod_init);
module_exit(crc32_mod_fini);
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 04b03d825cf4..985da981d6e2 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -195,19 +195,23 @@ static struct shash_alg algs[] = {{
.base.cra_ctxsize = sizeof(struct chksum_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_init = crc32c_cra_init,
}};
+static int num_algs;
+
static int __init crc32c_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
+ num_algs = 1 + ((crc32_optimizations() & CRC32C_OPTIMIZATION) != 0);
+
+ return crypto_register_shashes(algs, num_algs);
}
static void __exit crc32c_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
+ crypto_unregister_shashes(algs, num_algs);
}
subsys_initcall(crc32c_mod_init);
module_exit(crc32c_mod_fini);
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms
2024-11-03 22:31 ` [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms Eric Biggers
@ 2024-11-04 10:56 ` Ard Biesheuvel
0 siblings, 0 replies; 31+ messages in thread
From: Ard Biesheuvel @ 2024-11-04 10:56 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86
On Sun, 3 Nov 2024 at 23:32, Eric Biggers <ebiggers@kernel.org> wrote:
>
> From: Eric Biggers <ebiggers@google.com>
>
> Instead of registering the crc32-$arch and crc32c-$arch algorithms if
> the arch-specific code was built, only register them when that code was
> built *and* is not falling back to the base implementation at runtime.
>
> This avoids confusing users like btrfs which checks the shash driver
> name to determine whether it is crc32c-generic.
>
> (It would also make sense to change btrfs to test the crc32_optimization
> flags itself, so that it doesn't have to use the weird hack of parsing
> the driver name. This change still makes sense either way though.)
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> crypto/crc32_generic.c | 8 ++++++--
> crypto/crc32c_generic.c | 8 ++++++--
> 2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c
> index cc064ea8240e..783a30b27398 100644
> --- a/crypto/crc32_generic.c
> +++ b/crypto/crc32_generic.c
> @@ -155,19 +155,23 @@ static struct shash_alg algs[] = {{
> .base.cra_ctxsize = sizeof(u32),
> .base.cra_module = THIS_MODULE,
> .base.cra_init = crc32_cra_init,
> }};
>
> +static int num_algs;
> +
> static int __init crc32_mod_init(void)
> {
> /* register the arch flavor only if it differs from the generic one */
> - return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
> + num_algs = 1 + ((crc32_optimizations() & CRC32_LE_OPTIMIZATION) != 0);
> +
> + return crypto_register_shashes(algs, num_algs);
> }
>
> static void __exit crc32_mod_fini(void)
> {
> - crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
> + crypto_unregister_shashes(algs, num_algs);
> }
>
> subsys_initcall(crc32_mod_init);
> module_exit(crc32_mod_fini);
>
> diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
> index 04b03d825cf4..985da981d6e2 100644
> --- a/crypto/crc32c_generic.c
> +++ b/crypto/crc32c_generic.c
> @@ -195,19 +195,23 @@ static struct shash_alg algs[] = {{
> .base.cra_ctxsize = sizeof(struct chksum_ctx),
> .base.cra_module = THIS_MODULE,
> .base.cra_init = crc32c_cra_init,
> }};
>
> +static int num_algs;
> +
> static int __init crc32c_mod_init(void)
> {
> /* register the arch flavor only if it differs from the generic one */
> - return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
> + num_algs = 1 + ((crc32_optimizations() & CRC32C_OPTIMIZATION) != 0);
> +
> + return crypto_register_shashes(algs, num_algs);
> }
>
> static void __exit crc32c_mod_fini(void)
> {
> - crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
> + crypto_unregister_shashes(algs, num_algs);
> }
>
> subsys_initcall(crc32c_mod_init);
> module_exit(crc32c_mod_fini);
>
> --
> 2.47.0
>
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (3 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-04 18:10 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 06/18] loongarch/crc32: " Eric Biggers
` (12 subsequent siblings)
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the arm CRC32 assembly code into the lib directory and wire it up
to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/arm/crypto/crc32-ce-glue.c to
arch/arm/lib/crc32-glue.c, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/arm/Kconfig | 1 +
arch/arm/configs/milbeaut_m10v_defconfig | 1 -
arch/arm/configs/multi_v7_defconfig | 1 -
arch/arm/crypto/Kconfig | 14 -
arch/arm/crypto/Makefile | 2 -
arch/arm/crypto/crc32-ce-glue.c | 247 ------------------
arch/arm/lib/Makefile | 3 +
.../crc32-ce-core.S => lib/crc32-core.S} | 0
arch/arm/lib/crc32-glue.c | 123 +++++++++
9 files changed, 127 insertions(+), 265 deletions(-)
delete mode 100644 arch/arm/crypto/crc32-ce-glue.c
rename arch/arm/{crypto/crc32-ce-core.S => lib/crc32-core.S} (100%)
create mode 100644 arch/arm/lib/crc32-glue.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 749179a1d162..851260303234 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -5,10 +5,11 @@ config ARM
select ARCH_32BIT_OFF_T
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_CPU_CACHE_ALIASING
select ARCH_HAS_CPU_FINALIZE_INIT if MMU
+ select ARCH_HAS_CRC32 if KERNEL_MODE_NEON
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DMA_ALLOC if MMU
select ARCH_HAS_DMA_OPS
select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index f5eeac9c65c3..acd16204f8d7 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -105,11 +105,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
CONFIG_CRYPTO_CHACHA20_NEON=m
-CONFIG_CRYPTO_CRC32_ARM_CE=m
# CONFIG_CRYPTO_HW is not set
CONFIG_CRC_CCITT=m
CONFIG_CRC_ITU_T=m
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=64
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 9a5f5c439b87..287ca055965f 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1304,11 +1304,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=m
CONFIG_CRYPTO_SHA512_ARM=m
CONFIG_CRYPTO_AES_ARM=m
CONFIG_CRYPTO_AES_ARM_BS=m
CONFIG_CRYPTO_AES_ARM_CE=m
CONFIG_CRYPTO_CHACHA20_NEON=m
-CONFIG_CRYPTO_CRC32_ARM_CE=m
CONFIG_CRYPTO_DEV_SUN4I_SS=m
CONFIG_CRYPTO_DEV_FSL_CAAM=m
CONFIG_CRYPTO_DEV_EXYNOS_RNG=m
CONFIG_CRYPTO_DEV_S5P=m
CONFIG_CRYPTO_DEV_ATMEL_AES=m
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 5ff49a5e9afc..ea0ebf336d0d 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -220,24 +220,10 @@ config CRYPTO_CHACHA20_NEON
stream cipher algorithms
Architecture: arm using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_CRC32_ARM_CE
- tristate "CRC32C and CRC32"
- depends on KERNEL_MODE_NEON
- depends on CRC32
- select CRYPTO_HASH
- help
- CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
- and CRC32 CRC algorithm (IEEE 802.3)
-
- Architecture: arm using:
- - CRC and/or PMULL instructions
-
- Drivers: crc32-arm-ce and crc32c-arm-ce
-
config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF"
depends on KERNEL_MODE_NEON
depends on CRC_T10DIF
select CRYPTO_HASH
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 13e62c7c25dc..38ec5cc1e844 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -19,11 +19,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
@@ -36,11 +35,10 @@ blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
-crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
chacha-neon-y := chacha-scalar-core.o chacha-glue.o
chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
curve25519-neon-y := curve25519-core.o curve25519-glue.o
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
deleted file mode 100644
index 20b4dff13e3a..000000000000
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ /dev/null
@@ -1,247 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
- *
- * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-#include <linux/unaligned.h>
-
-#define PMULL_MIN_LEN 64L /* minimum size of buffer
- * for crc32_pmull_le_16 */
-#define SCALE_F 16L /* size of NEON register */
-
-asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
-asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
-
-static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len);
-static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len);
-
-static int crc32_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 0;
- return 0;
-}
-
-static int crc32c_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
- return 0;
-}
-
-static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32))
- return -EINVAL;
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = *mctx;
- return 0;
-}
-
-static int crc32_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32c_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- *crc = crc32c_armv8_le(*crc, data, length);
- return 0;
-}
-
-static int crc32_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(*crc, out);
- return 0;
-}
-
-static int crc32c_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crc = shash_desc_ctx(desc);
-
- put_unaligned_le32(~*crc, out);
- return 0;
-}
-
-static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if (crypto_simd_usable()) {
- if ((u32)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
-
- *crc = fallback_crc32(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
- }
-
- if (length > 0)
- *crc = fallback_crc32(*crc, data, length);
-
- return 0;
-}
-
-static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- u32 *crc = shash_desc_ctx(desc);
- unsigned int l;
-
- if (crypto_simd_usable()) {
- if ((u32)data % SCALE_F) {
- l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
-
- *crc = fallback_crc32c(*crc, data, l);
-
- data += l;
- length -= l;
- }
-
- if (length >= PMULL_MIN_LEN) {
- l = round_down(length, SCALE_F);
-
- kernel_neon_begin();
- *crc = crc32c_pmull_le(data, l, *crc);
- kernel_neon_end();
-
- data += l;
- length -= l;
- }
- }
-
- if (length > 0)
- *crc = fallback_crc32c(*crc, data, length);
-
- return 0;
-}
-
-static struct shash_alg crc32_pmull_algs[] = { {
- .setkey = crc32_setkey,
- .init = crc32_init,
- .update = crc32_update,
- .final = crc32_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32_cra_init,
- .base.cra_name = "crc32",
- .base.cra_driver_name = "crc32-arm-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-}, {
- .setkey = crc32_setkey,
- .init = crc32_init,
- .update = crc32c_update,
- .final = crc32c_final,
- .descsize = sizeof(u32),
- .digestsize = sizeof(u32),
-
- .base.cra_ctxsize = sizeof(u32),
- .base.cra_init = crc32c_cra_init,
- .base.cra_name = "crc32c",
- .base.cra_driver_name = "crc32c-arm-ce",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_blocksize = 1,
- .base.cra_module = THIS_MODULE,
-} };
-
-static int __init crc32_pmull_mod_init(void)
-{
- if (elf_hwcap2 & HWCAP2_PMULL) {
- crc32_pmull_algs[0].update = crc32_pmull_update;
- crc32_pmull_algs[1].update = crc32c_pmull_update;
-
- if (elf_hwcap2 & HWCAP2_CRC32) {
- fallback_crc32 = crc32_armv8_le;
- fallback_crc32c = crc32c_armv8_le;
- } else {
- fallback_crc32 = crc32_le;
- fallback_crc32c = __crc32c_le;
- }
- } else if (!(elf_hwcap2 & HWCAP2_CRC32)) {
- return -ENODEV;
- }
-
- return crypto_register_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static void __exit crc32_pmull_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_pmull_algs,
- ARRAY_SIZE(crc32_pmull_algs));
-}
-
-static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
- { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
-};
-MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
-
-module_init(crc32_pmull_mod_init);
-module_exit(crc32_pmull_mod_exit);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32c");
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index 0ca5aae1bcc3..01cd4db2ed47 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -43,5 +43,8 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
CFLAGS_xor-neon.o += $(CC_FLAGS_FPU)
obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
endif
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_CRC32_ARCH) += crc32-arm.o
+crc32-arm-y := crc32-glue.o crc32-core.o
diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/lib/crc32-core.S
similarity index 100%
rename from arch/arm/crypto/crc32-ce-core.S
rename to arch/arm/lib/crc32-core.S
diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c
new file mode 100644
index 000000000000..2d56fb2b0a1c
--- /dev/null
+++ b/arch/arm/lib/crc32-glue.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/simd.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static DEFINE_STATIC_KEY_FALSE(have_pmull);
+
+#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */
+
+asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
+asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
+
+static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
+{
+ if (static_branch_likely(&have_crc32))
+ return crc32_armv8_le(crc, p, len);
+ return crc32_le_base(crc, p, len);
+}
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (len >= PMULL_MIN_LEN + 15 &&
+ crypto_simd_usable() && static_branch_likely(&have_pmull)) {
+ size_t n = -(uintptr_t)p & 15;
+
+ /* align p to 16-byte boundary */
+ if (n) {
+ crc = crc32_le_scalar(crc, p, n);
+ p += n;
+ len -= n;
+ }
+ n = round_down(len, 16);
+ kernel_neon_begin();
+ crc = crc32_pmull_le(p, n, crc);
+ kernel_neon_end();
+ p += n;
+ len -= n;
+ }
+ return crc32_le_scalar(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len)
+{
+ if (static_branch_likely(&have_crc32))
+ return crc32c_armv8_le(crc, p, len);
+ return crc32c_le_base(crc, p, len);
+}
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (len >= PMULL_MIN_LEN + 15 &&
+ crypto_simd_usable() && static_branch_likely(&have_pmull)) {
+ size_t n = -(uintptr_t)p & 15;
+
+ /* align p to 16-byte boundary */
+ if (n) {
+ crc = crc32c_le_scalar(crc, p, n);
+ p += n;
+ len -= n;
+ }
+ n = round_down(len, 16);
+ kernel_neon_begin();
+ crc = crc32c_pmull_le(p, n, crc);
+ kernel_neon_end();
+ p += n;
+ len -= n;
+ }
+ return crc32c_le_scalar(crc, p, len);
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_arm_init(void)
+{
+ if (elf_hwcap2 & HWCAP2_CRC32)
+ static_branch_enable(&have_crc32);
+ if (elf_hwcap2 & HWCAP2_PMULL)
+ static_branch_enable(&have_pmull);
+ return 0;
+}
+arch_initcall(crc32_arm_init);
+
+static void __exit crc32_arm_exit(void)
+{
+}
+module_exit(crc32_arm_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL))
+ return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions");
+MODULE_LICENSE("GPL v2");
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib
2024-11-03 22:31 ` [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib Eric Biggers
@ 2024-11-04 18:10 ` Eric Biggers
0 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-04 18:10 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
On Sun, Nov 03, 2024 at 02:31:41PM -0800, Eric Biggers wrote:
> -static int __init crc32_pmull_mod_init(void)
> -{
> - if (elf_hwcap2 & HWCAP2_PMULL) {
> - crc32_pmull_algs[0].update = crc32_pmull_update;
> - crc32_pmull_algs[1].update = crc32c_pmull_update;
> -
> - if (elf_hwcap2 & HWCAP2_CRC32) {
> - fallback_crc32 = crc32_armv8_le;
> - fallback_crc32c = crc32c_armv8_le;
> - } else {
> - fallback_crc32 = crc32_le;
> - fallback_crc32c = __crc32c_le;
> - }
> - } else if (!(elf_hwcap2 & HWCAP2_CRC32)) {
> - return -ENODEV;
> - }
[...]
> +static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
> +{
> + if (static_branch_likely(&have_crc32))
> + return crc32_armv8_le(crc, p, len);
> + return crc32_le_base(crc, p, len);
> +}
kernel test robot reported a build error on this patch,
"undefined symbol: __kcfi_typeid_crc32_armv8_le". It's because crc32-core.S
uses SYM_TYPED_FUNC_START(crc32_armv8_le), and this patch makes crc32_armv8_le
be called only via direct calls, not indirect calls as it was before. I will
fix this by replacing SYM_TYPED_FUNC_START by SYM_FUNC_START.
- Eric
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 06/18] loongarch/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (4 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 07/18] mips/crc32: " Eric Biggers
` (11 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the loongarch CRC32 assembly code into the lib directory and wire
it up to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/loongarch/crypto/crc32-loongarch.c to
arch/loongarch/lib/crc32-loongarch.c, view this commit with
'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/loongarch/Kconfig | 1 +
arch/loongarch/configs/loongson3_defconfig | 1 -
arch/loongarch/crypto/Kconfig | 9 -
arch/loongarch/crypto/Makefile | 2 -
arch/loongarch/crypto/crc32-loongarch.c | 300 ---------------------
arch/loongarch/lib/Makefile | 2 +
arch/loongarch/lib/crc32-loongarch.c | 135 ++++++++++
7 files changed, 138 insertions(+), 312 deletions(-)
delete mode 100644 arch/loongarch/crypto/crc32-loongarch.c
create mode 100644 arch/loongarch/lib/crc32-loongarch.c
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index bb35c34f86d2..455f1af0bf88 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -13,10 +13,11 @@ config LOONGARCH
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_KCOV
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 75b366407a60..0487ac21b38b 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -967,11 +967,10 @@ CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_LOONGARCH=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_DMA_CMA=y
CONFIG_DMA_NUMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_PRINTK_TIME=y
diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig
index 200a6e8b43b1..a0270b3e5b30 100644
--- a/arch/loongarch/crypto/Kconfig
+++ b/arch/loongarch/crypto/Kconfig
@@ -1,14 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (loongarch)"
-config CRYPTO_CRC32_LOONGARCH
- tristate "CRC32c and CRC32"
- select CRC32
- select CRYPTO_HASH
- help
- CRC32c and CRC32 CRC algorithms
-
- Architecture: LoongArch with CRC32 instructions
-
endmenu
diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile
index d22613d27ce9..ba83755dde2b 100644
--- a/arch/loongarch/crypto/Makefile
+++ b/arch/loongarch/crypto/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for LoongArch crypto files..
#
-
-obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) += crc32-loongarch.o
diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypto/crc32-loongarch.c
deleted file mode 100644
index b7d9782827f5..000000000000
--- a/arch/loongarch/crypto/crc32-loongarch.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions
- *
- * Module based on mips/crypto/crc32-mips.c
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- * Copyright (C) 2018 MIPS Tech, LLC
- * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
- */
-
-#include <linux/module.h>
-#include <crypto/internal/hash.h>
-
-#include <asm/cpu-features.h>
-#include <linux/unaligned.h>
-
-#define _CRC32(crc, value, size, type) \
-do { \
- __asm__ __volatile__( \
- #type ".w." #size ".w" " %0, %1, %0\n\t"\
- : "+r" (crc) \
- : "r" (value) \
- : "memory"); \
-} while (0)
-
-#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
-#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
-
-static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
-{
- u32 crc = crc_;
-
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32(crc, value, b);
- }
-
- return crc;
-}
-
-static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len)
-{
- u32 crc = crc_;
-
- while (len >= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32C(crc, value, d);
- p += sizeof(u64);
- len -= sizeof(u64);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32C(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32C(crc, value, b);
- }
-
- return crc;
-}
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-struct chksum_ctx {
- u32 key;
-};
-
-struct chksum_desc_ctx {
- u32 crc;
-};
-
-static int chksum_init(struct shash_desc *desc)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
-
- return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set the seed.
- */
-static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (keylen != sizeof(mctx->key))
- return -EINVAL;
-
- mctx->key = get_unaligned_le32(key);
-
- return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32_loongarch_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32c_loongarch_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(ctx->crc, out);
- return 0;
-}
-
-static int chksumc_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(~ctx->crc, out);
- return 0;
-}
-
-static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out);
- return 0;
-}
-
-static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out);
- return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(ctx->crc, data, len, out);
-}
-
-static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksumc_finup(ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksum_finup(mctx->key, data, length, out);
-}
-
-static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksumc_finup(mctx->key, data, length, out);
-}
-
-static int chksum_cra_init(struct crypto_tfm *tfm)
-{
- struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = 0;
- return 0;
-}
-
-static int chksumc_cra_init(struct crypto_tfm *tfm)
-{
- struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static struct shash_alg crc32_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksum_update,
- .final = chksum_final,
- .finup = chksum_finup,
- .digest = chksum_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-loongarch",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = chksum_cra_init,
- }
-};
-
-static struct shash_alg crc32c_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksumc_update,
- .final = chksumc_final,
- .finup = chksumc_finup,
- .digest = chksumc_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-loongarch",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = chksumc_cra_init,
- }
-};
-
-static int __init crc32_mod_init(void)
-{
- int err;
-
- if (!cpu_has(CPU_FEATURE_CRC32))
- return 0;
-
- err = crypto_register_shash(&crc32_alg);
- if (err)
- return err;
-
- err = crypto_register_shash(&crc32c_alg);
- if (err)
- return err;
-
- return 0;
-}
-
-static void __exit crc32_mod_exit(void)
-{
- if (!cpu_has(CPU_FEATURE_CRC32))
- return;
-
- crypto_unregister_shash(&crc32_alg);
- crypto_unregister_shash(&crc32c_alg);
-}
-
-module_init(crc32_mod_init);
-module_exit(crc32_mod_exit);
-
-MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
-MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
-MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
-MODULE_LICENSE("GPL v2");
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index ccea3bbd4353..fae77809048b 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -9,5 +9,7 @@ lib-y += delay.o memset.o memcpy.o memmove.o \
obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_CRC32_ARCH) += crc32-loongarch.o
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
new file mode 100644
index 000000000000..ce862e398eb3
--- /dev/null
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CRC32 and CRC32C using LoongArch crc* instructions
+ *
+ * Module based on mips/crypto/crc32-mips.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <asm/cpu-features.h>
+#include <linux/crc32.h>
+#include <linux/module.h>
+#include <linux/unaligned.h>
+
+#define _CRC32(crc, value, size, type) \
+do { \
+ __asm__ __volatile__( \
+ #type ".w." #size ".w" " %0, %1, %0\n\t"\
+ : "+r" (crc) \
+ : "r" (value) \
+ : "memory"); \
+} while (0)
+
+#define CRC32(crc, value, size) _CRC32(crc, value, size, crc)
+#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc)
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (!static_branch_likely(&have_crc32))
+ return crc32_le_base(crc, p, len);
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32(crc, value, b);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (!static_branch_likely(&have_crc32))
+ return crc32c_le_base(crc, p, len);
+
+ while (len >= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32C(crc, value, d);
+ p += sizeof(u64);
+ len -= sizeof(u64);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32C(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32C(crc, value, b);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_loongarch_init(void)
+{
+ if (cpu_has(CPU_FEATURE_CRC32))
+ static_branch_enable(&have_crc32);
+ return 0;
+}
+arch_initcall(crc32_loongarch_init);
+
+static void __exit crc32_loongarch_exit(void)
+{
+}
+module_exit(crc32_loongarch_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_crc32))
+ return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Min Zhou <zhoumin@loongson.cn>");
+MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
+MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions");
+MODULE_LICENSE("GPL v2");
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 07/18] mips/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (5 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 06/18] loongarch/crc32: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 08/18] powerpc/crc32: " Eric Biggers
` (10 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the mips CRC32 assembly code into the lib directory and wire it up
to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/mips/crypto/crc32-mips.c to
arch/mips/lib/crc32-mips.c, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/mips/Kconfig | 5 +-
arch/mips/configs/eyeq5_defconfig | 1 -
arch/mips/configs/eyeq6_defconfig | 1 -
arch/mips/configs/generic/32r6.config | 2 -
arch/mips/configs/generic/64r6.config | 1 -
arch/mips/crypto/Kconfig | 9 -
arch/mips/crypto/Makefile | 2 -
arch/mips/crypto/crc32-mips.c | 354 --------------------------
arch/mips/lib/Makefile | 2 +
arch/mips/lib/crc32-mips.c | 192 ++++++++++++++
10 files changed, 195 insertions(+), 374 deletions(-)
delete mode 100644 arch/mips/crypto/crc32-mips.c
create mode 100644 arch/mips/lib/crc32-mips.c
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 397edf05dd72..f80ea80d792f 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1993,15 +1993,15 @@ config CPU_MIPSR5
select MIPS_SPRAM
config CPU_MIPSR6
bool
default y if CPU_MIPS32_R6 || CPU_MIPS64_R6
+ select ARCH_HAS_CRC32
select CPU_HAS_RIXI
select CPU_HAS_DIEI if !CPU_DIEI_BROKEN
select HAVE_ARCH_BITREVERSE
select MIPS_ASID_BITS_VARIABLE
- select MIPS_CRC_SUPPORT
select MIPS_SPRAM
config TARGET_ISA_REV
int
default 1 if CPU_MIPSR1
@@ -2473,13 +2473,10 @@ config MIPS_ASID_BITS
default 8
config MIPS_ASID_BITS_VARIABLE
bool
-config MIPS_CRC_SUPPORT
- bool
-
# R4600 erratum. Due to the lack of errata information the exact
# technical details aren't known. I've experimentally found that disabling
# interrupts during indexed I-cache flushes seems to be sufficient to deal
# with the issue.
config WAR_R4600_V1_INDEX_ICACHEOP
diff --git a/arch/mips/configs/eyeq5_defconfig b/arch/mips/configs/eyeq5_defconfig
index ae9a09b16e40..ff7af5dc6d9d 100644
--- a/arch/mips/configs/eyeq5_defconfig
+++ b/arch/mips/configs/eyeq5_defconfig
@@ -97,11 +97,10 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
-CONFIG_CRYPTO_CRC32_MIPS=y
CONFIG_FRAME_WARN=1024
CONFIG_DEBUG_FS=y
# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
CONFIG_CMDLINE_BOOL=y
diff --git a/arch/mips/configs/eyeq6_defconfig b/arch/mips/configs/eyeq6_defconfig
index 6597d5e88b33..0afbb45a78e8 100644
--- a/arch/mips/configs/eyeq6_defconfig
+++ b/arch/mips/configs/eyeq6_defconfig
@@ -100,11 +100,10 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
-CONFIG_CRYPTO_CRC32_MIPS=y
CONFIG_FRAME_WARN=1024
CONFIG_DEBUG_FS=y
# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
CONFIG_CMDLINE_BOOL=y
diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config
index 1a5d5ea4ab2b..ca606e71f4d0 100644
--- a/arch/mips/configs/generic/32r6.config
+++ b/arch/mips/configs/generic/32r6.config
@@ -1,4 +1,2 @@
CONFIG_CPU_MIPS32_R6=y
CONFIG_HIGHMEM=y
-
-CONFIG_CRYPTO_CRC32_MIPS=y
diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config
index 63b4e95f303d..23a300914957 100644
--- a/arch/mips/configs/generic/64r6.config
+++ b/arch/mips/configs/generic/64r6.config
@@ -2,7 +2,6 @@ CONFIG_CPU_MIPS64_R6=y
CONFIG_64BIT=y
CONFIG_MIPS32_O32=y
CONFIG_MIPS32_N32=y
CONFIG_CPU_HAS_MSA=y
-CONFIG_CRYPTO_CRC32_MIPS=y
CONFIG_VIRTUALIZATION=y
diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig
index 9003a5c1e879..7decd40c4e20 100644
--- a/arch/mips/crypto/Kconfig
+++ b/arch/mips/crypto/Kconfig
@@ -1,18 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (mips)"
-config CRYPTO_CRC32_MIPS
- tristate "CRC32c and CRC32"
- depends on MIPS_CRC_SUPPORT
- select CRYPTO_HASH
- help
- CRC32c and CRC32 CRC algorithms
-
- Architecture: mips
-
config CRYPTO_POLY1305_MIPS
tristate "Hash functions: Poly1305"
depends on MIPS
select CRYPTO_ARCH_HAVE_LIB_POLY1305
help
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index 5e4105cccf9f..fddc88281412 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -1,12 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for MIPS crypto files..
#
-obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
-
obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
chacha-mips-y := chacha-core.o chacha-glue.o
AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c
deleted file mode 100644
index 90eacf00cfc3..000000000000
--- a/arch/mips/crypto/crc32-mips.c
+++ /dev/null
@@ -1,354 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
- *
- * Module based on arm64/crypto/crc32-arm.c
- *
- * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
- * Copyright (C) 2018 MIPS Tech, LLC
- */
-
-#include <linux/cpufeature.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <asm/mipsregs.h>
-#include <linux/unaligned.h>
-
-#include <crypto/internal/hash.h>
-
-enum crc_op_size {
- b, h, w, d,
-};
-
-enum crc_type {
- crc32,
- crc32c,
-};
-
-#ifndef TOOLCHAIN_SUPPORTS_CRC
-#define _ASM_SET_CRC(OP, SZ, TYPE) \
-_ASM_MACRO_3R(OP, rt, rs, rt2, \
- ".ifnc \\rt, \\rt2\n\t" \
- ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
- ".endif\n\t" \
- _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \
- ((SZ) << 6) | ((TYPE) << 8)) \
- _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \
- ((SZ) << 14) | ((TYPE) << 3)))
-#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t"
-#else /* !TOOLCHAIN_SUPPORTS_CRC */
-#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t"
-#define _ASM_UNSET_CRC(op, SZ, TYPE)
-#endif
-
-#define __CRC32(crc, value, op, SZ, TYPE) \
-do { \
- __asm__ __volatile__( \
- ".set push\n\t" \
- _ASM_SET_CRC(op, SZ, TYPE) \
- #op " %0, %1, %0\n\t" \
- _ASM_UNSET_CRC(op, SZ, TYPE) \
- ".set pop" \
- : "+r" (crc) \
- : "r" (value)); \
-} while (0)
-
-#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0)
-#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0)
-#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0)
-#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0)
-#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1)
-#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1)
-#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1)
-#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1)
-
-#define _CRC32(crc, value, size, op) \
- _CRC32_##op##size(crc, value)
-
-#define CRC32(crc, value, size) \
- _CRC32(crc, value, size, crc32)
-
-#define CRC32C(crc, value, size) \
- _CRC32(crc, value, size, crc32c)
-
-static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
-{
- u32 crc = crc_;
-
- if (IS_ENABLED(CONFIG_64BIT)) {
- for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32(crc, value, d);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
- } else {
- for (; len >= sizeof(u32); len -= sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32(crc, value, w);
- p += sizeof(u32);
- }
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32(crc, value, b);
- }
-
- return crc;
-}
-
-static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len)
-{
- u32 crc = crc_;
-
- if (IS_ENABLED(CONFIG_64BIT)) {
- for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
- u64 value = get_unaligned_le64(p);
-
- CRC32C(crc, value, d);
- }
-
- if (len & sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
- } else {
- for (; len >= sizeof(u32); len -= sizeof(u32)) {
- u32 value = get_unaligned_le32(p);
-
- CRC32C(crc, value, w);
- p += sizeof(u32);
- }
- }
-
- if (len & sizeof(u16)) {
- u16 value = get_unaligned_le16(p);
-
- CRC32C(crc, value, h);
- p += sizeof(u16);
- }
-
- if (len & sizeof(u8)) {
- u8 value = *p++;
-
- CRC32C(crc, value, b);
- }
- return crc;
-}
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-struct chksum_ctx {
- u32 key;
-};
-
-struct chksum_desc_ctx {
- u32 crc;
-};
-
-static int chksum_init(struct shash_desc *desc)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
-
- return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int chksum_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (keylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = get_unaligned_le32(key);
- return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32_mips_le_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksumc_update(struct shash_desc *desc, const u8 *data,
- unsigned int length)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = crc32c_mips_le_hw(ctx->crc, data, length);
- return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(ctx->crc, out);
- return 0;
-}
-
-static int chksumc_final(struct shash_desc *desc, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- put_unaligned_le32(~ctx->crc, out);
- return 0;
-}
-
-static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out);
- return 0;
-}
-
-static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *out)
-{
- put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out);
- return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(ctx->crc, data, len, out);
-}
-
-static int chksumc_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksumc_finup(ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
- unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksum_finup(mctx->key, data, length, out);
-}
-
-static int chksumc_digest(struct shash_desc *desc, const u8 *data,
- unsigned int length, u8 *out)
-{
- struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm);
-
- return __chksumc_finup(mctx->key, data, length, out);
-}
-
-static int chksum_cra_init(struct crypto_tfm *tfm)
-{
- struct chksum_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static struct shash_alg crc32_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksum_update,
- .final = chksum_final,
- .finup = chksum_finup,
- .digest = chksum_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-mips-hw",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = chksum_cra_init,
- }
-};
-
-static struct shash_alg crc32c_alg = {
- .digestsize = CHKSUM_DIGEST_SIZE,
- .setkey = chksum_setkey,
- .init = chksum_init,
- .update = chksumc_update,
- .final = chksumc_final,
- .finup = chksumc_finup,
- .digest = chksumc_digest,
- .descsize = sizeof(struct chksum_desc_ctx),
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-mips-hw",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct chksum_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = chksum_cra_init,
- }
-};
-
-static int __init crc32_mod_init(void)
-{
- int err;
-
- err = crypto_register_shash(&crc32_alg);
-
- if (err)
- return err;
-
- err = crypto_register_shash(&crc32c_alg);
-
- if (err) {
- crypto_unregister_shash(&crc32_alg);
- return err;
- }
-
- return 0;
-}
-
-static void __exit crc32_mod_exit(void)
-{
- crypto_unregister_shash(&crc32_alg);
- crypto_unregister_shash(&crc32c_alg);
-}
-
-MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
-MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
-MODULE_LICENSE("GPL v2");
-
-module_cpu_feature_match(MIPS_CRC32, crc32_mod_init);
-module_exit(crc32_mod_exit);
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 5d5b993cbc2b..9c024e6d5e54 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -12,7 +12,9 @@ obj-$(CONFIG_PCI) += iomap-pci.o
lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y))
obj-$(CONFIG_CPU_GENERIC_DUMP_TLB) += dump_tlb.o
obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-mips.o
+
# libgcc-style stuff needed in the kernel
obj-y += bswapsi.o bswapdi.o multi3.o
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
new file mode 100644
index 000000000000..083e5d693a16
--- /dev/null
+++ b/arch/mips/lib/crc32-mips.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions
+ *
+ * Module based on arm64/crypto/crc32-arm.c
+ *
+ * Copyright (C) 2014 Linaro Ltd <yazen.ghannam@linaro.org>
+ * Copyright (C) 2018 MIPS Tech, LLC
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/mipsregs.h>
+#include <linux/unaligned.h>
+
+enum crc_op_size {
+ b, h, w, d,
+};
+
+enum crc_type {
+ crc32,
+ crc32c,
+};
+
+#ifndef TOOLCHAIN_SUPPORTS_CRC
+#define _ASM_SET_CRC(OP, SZ, TYPE) \
+_ASM_MACRO_3R(OP, rt, rs, rt2, \
+ ".ifnc \\rt, \\rt2\n\t" \
+ ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \
+ ".endif\n\t" \
+ _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \
+ ((SZ) << 6) | ((TYPE) << 8)) \
+ _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \
+ ((SZ) << 14) | ((TYPE) << 3)))
+#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t"
+#else /* !TOOLCHAIN_SUPPORTS_CRC */
+#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t"
+#define _ASM_UNSET_CRC(op, SZ, TYPE)
+#endif
+
+#define __CRC32(crc, value, op, SZ, TYPE) \
+do { \
+ __asm__ __volatile__( \
+ ".set push\n\t" \
+ _ASM_SET_CRC(op, SZ, TYPE) \
+ #op " %0, %1, %0\n\t" \
+ _ASM_UNSET_CRC(op, SZ, TYPE) \
+ ".set pop" \
+ : "+r" (crc) \
+ : "r" (value)); \
+} while (0)
+
+#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0)
+#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0)
+#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0)
+#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0)
+#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1)
+#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1)
+#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1)
+#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1)
+
+#define _CRC32(crc, value, size, op) \
+ _CRC32_##op##size(crc, value)
+
+#define CRC32(crc, value, size) \
+ _CRC32(crc, value, size, crc32)
+
+#define CRC32C(crc, value, size) \
+ _CRC32(crc, value, size, crc32c)
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (!static_branch_likely(&have_crc32))
+ return crc32_le_base(crc, p, len);
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32(crc, value, d);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
+ } else {
+ for (; len >= sizeof(u32); len -= sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32(crc, value, w);
+ p += sizeof(u32);
+ }
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32(crc, value, b);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (!static_branch_likely(&have_crc32))
+ return crc32c_le_base(crc, p, len);
+
+ if (IS_ENABLED(CONFIG_64BIT)) {
+ for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
+ u64 value = get_unaligned_le64(p);
+
+ CRC32C(crc, value, d);
+ }
+
+ if (len & sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ }
+ } else {
+ for (; len >= sizeof(u32); len -= sizeof(u32)) {
+ u32 value = get_unaligned_le32(p);
+
+ CRC32C(crc, value, w);
+ p += sizeof(u32);
+ }
+ }
+
+ if (len & sizeof(u16)) {
+ u16 value = get_unaligned_le16(p);
+
+ CRC32C(crc, value, h);
+ p += sizeof(u16);
+ }
+
+ if (len & sizeof(u8)) {
+ u8 value = *p++;
+
+ CRC32C(crc, value, b);
+ }
+ return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_mips_init(void)
+{
+ if (cpu_have_feature(cpu_feature(MIPS_CRC32)))
+ static_branch_enable(&have_crc32);
+ return 0;
+}
+arch_initcall(crc32_mips_init);
+
+static void __exit crc32_mips_exit(void)
+{
+}
+module_exit(crc32_mips_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_crc32))
+ return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Marcin Nowakowski <marcin.nowakowski@mips.com");
+MODULE_DESCRIPTION("CRC32 and CRC32C using optional MIPS instructions");
+MODULE_LICENSE("GPL v2");
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 08/18] powerpc/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (6 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 07/18] mips/crc32: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-05 9:22 ` Michael Ellerman
2024-11-03 22:31 ` [PATCH v3 09/18] s390/crc32: " Eric Biggers
` (9 subsequent siblings)
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the powerpc CRC32C assembly code into the lib directory and wire it
up to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/powerpc/crypto/crc32c-vpmsum_glue.c to
arch/powerpc/lib/crc32-glue.c, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/powerpc/Kconfig | 1 +
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/ppc64_defconfig | 1 -
arch/powerpc/crypto/Kconfig | 15 +-
arch/powerpc/crypto/Makefile | 2 -
arch/powerpc/crypto/crc32c-vpmsum_glue.c | 173 ------------------
arch/powerpc/crypto/crct10dif-vpmsum_asm.S | 2 +-
arch/powerpc/lib/Makefile | 3 +
arch/powerpc/lib/crc32-glue.c | 92 ++++++++++
.../{crypto => lib}/crc32-vpmsum_core.S | 0
.../{crypto => lib}/crc32c-vpmsum_asm.S | 0
11 files changed, 98 insertions(+), 192 deletions(-)
delete mode 100644 arch/powerpc/crypto/crc32c-vpmsum_glue.c
create mode 100644 arch/powerpc/lib/crc32-glue.c
rename arch/powerpc/{crypto => lib}/crc32-vpmsum_core.S (100%)
rename arch/powerpc/{crypto => lib}/crc32c-vpmsum_asm.S (100%)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8094a01974cc..b05889400b04 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -125,10 +125,11 @@ config PPC
select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU
select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_COPY_MC if PPC64
+ select ARCH_HAS_CRC32 if PPC64 && ALTIVEC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX
select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index ee84ade7a033..4a7ddea05b4d 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -318,11 +318,10 @@ CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_SHA256=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index a5e3e7f97f4d..ea01c0d6705f 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -388,11 +388,10 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_CRC32C_VPMSUM=m
CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m
CONFIG_CRYPTO_VPMSUM_TESTER=m
CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_SHA1_PPC=m
CONFIG_CRYPTO_AES_GCM_P10=m
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 46a4c85e85e2..5c016ec39530 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -11,23 +11,10 @@ config CRYPTO_CURVE25519_PPC64
Curve25519 algorithm
Architecture: PowerPC64
- Little-endian
-config CRYPTO_CRC32C_VPMSUM
- tristate "CRC32c"
- depends on PPC64 && ALTIVEC
- select CRYPTO_HASH
- select CRC32
- help
- CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
-
- Architecture: powerpc64 using
- - AltiVec extensions
-
- Enable on POWER8 and newer processors for improved performance.
-
config CRYPTO_CRCT10DIF_VPMSUM
tristate "CRC32T10DIF"
depends on PPC64 && ALTIVEC && CRC_T10DIF
select CRYPTO_HASH
help
@@ -38,11 +25,11 @@ config CRYPTO_CRCT10DIF_VPMSUM
Enable on POWER8 and newer processors for improved performance.
config CRYPTO_VPMSUM_TESTER
tristate "CRC32c and CRC32T10DIF hardware acceleration tester"
- depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM
+ depends on CRYPTO_CRCT10DIF_VPMSUM && CRC32_ARCH
help
Stress test for CRC32c and CRCT10DIF algorithms implemented with
powerpc64 AltiVec extensions (POWER8 vpmsum instructions).
Unless you are testing these algorithms, you don't need this.
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 59808592f0a1..54486192273c 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -8,11 +8,10 @@
obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
-obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
@@ -22,11 +21,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
md5-ppc-y := md5-asm.o md5-glue.o
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
-crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
deleted file mode 100644
index 63760b7dbb76..000000000000
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ /dev/null
@@ -1,173 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/cpufeature.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-#define VMX_ALIGN 16
-#define VMX_ALIGN_MASK (VMX_ALIGN-1)
-
-#define VECTOR_BREAKPOINT 512
-
-u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len);
-
-static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
-{
- unsigned int prealign;
- unsigned int tail;
-
- if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
- return __crc32c_le(crc, p, len);
-
- if ((unsigned long)p & VMX_ALIGN_MASK) {
- prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
- crc = __crc32c_le(crc, p, prealign);
- len -= prealign;
- p += prealign;
- }
-
- if (len & ~VMX_ALIGN_MASK) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_altivec();
- crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
- disable_kernel_altivec();
- pagefault_enable();
- preempt_enable();
- }
-
- tail = len & VMX_ALIGN_MASK;
- if (tail) {
- p += len & ~VMX_ALIGN_MASK;
- crc = __crc32c_le(crc, p, tail);
- }
-
- return crc;
-}
-
-static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
-
- return 0;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32))
- return -EINVAL;
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32c_vpmsum_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = *mctx;
-
- return 0;
-}
-
-static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = crc32c_vpmsum(*crcp, data, len);
-
- return 0;
-}
-
-static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len));
-
- return 0;
-}
-
-static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *(__le32 *)out = ~cpu_to_le32p(crcp);
-
- return 0;
-}
-
-static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static struct shash_alg alg = {
- .setkey = crc32c_vpmsum_setkey,
- .init = crc32c_vpmsum_init,
- .update = crc32c_vpmsum_update,
- .final = crc32c_vpmsum_final,
- .finup = crc32c_vpmsum_finup,
- .digest = crc32c_vpmsum_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-vpmsum",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = crc32c_vpmsum_cra_init,
- }
-};
-
-static int __init crc32c_vpmsum_mod_init(void)
-{
- if (!cpu_has_feature(CPU_FTR_ARCH_207S))
- return -ENODEV;
-
- return crypto_register_shash(&alg);
-}
-
-static void __exit crc32c_vpmsum_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
-module_exit(crc32c_vpmsum_mod_fini);
-
-MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
-MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-vpmsum");
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
index f0b93a0fe168..0a52261bf859 100644
--- a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
@@ -840,6 +840,6 @@
.octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */
/* Barrett constant n */
.octa 0x0000000000000000000000018bb70000
#define CRC_FUNCTION_NAME __crct10dif_vpmsum
-#include "crc32-vpmsum_core.S"
+#include "../lib/crc32-vpmsum_core.S"
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index f14ecab674a3..da9381a1c95b 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -76,6 +76,9 @@ obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
# Enable <altivec.h>
CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
+obj-$(CONFIG_CRC32_ARCH) += crc32-powerpc.o
+crc32-powerpc-y := crc32-glue.o crc32c-vpmsum_asm.o
+
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c
new file mode 100644
index 000000000000..e9730f028afb
--- /dev/null
+++ b/arch/powerpc/lib/crc32-glue.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/crc32.h>
+#include <crypto/internal/simd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+
+#define VMX_ALIGN 16
+#define VMX_ALIGN_MASK (VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT 512
+
+static DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+
+u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_le_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ unsigned int prealign;
+ unsigned int tail;
+
+ if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable() ||
+ !static_branch_likely(&have_vec_crypto))
+ return crc32c_le_base(crc, p, len);
+
+ if ((unsigned long)p & VMX_ALIGN_MASK) {
+ prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+ crc = crc32c_le_base(crc, p, prealign);
+ len -= prealign;
+ p += prealign;
+ }
+
+ if (len & ~VMX_ALIGN_MASK) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_altivec();
+ crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+ disable_kernel_altivec();
+ pagefault_enable();
+ preempt_enable();
+ }
+
+ tail = len & VMX_ALIGN_MASK;
+ if (tail) {
+ p += len & ~VMX_ALIGN_MASK;
+ crc = crc32c_le_base(crc, p, tail);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_powerpc_init(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
+ static_branch_enable(&have_vec_crypto);
+ return 0;
+}
+arch_initcall(crc32_powerpc_init);
+
+static void __exit crc32_powerpc_exit(void)
+{
+}
+module_exit(crc32_powerpc_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_vec_crypto))
+ return CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
+MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/lib/crc32-vpmsum_core.S
similarity index 100%
rename from arch/powerpc/crypto/crc32-vpmsum_core.S
rename to arch/powerpc/lib/crc32-vpmsum_core.S
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc32c-vpmsum_asm.S
similarity index 100%
rename from arch/powerpc/crypto/crc32c-vpmsum_asm.S
rename to arch/powerpc/lib/crc32c-vpmsum_asm.S
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 08/18] powerpc/crc32: expose CRC32 functions through lib
2024-11-03 22:31 ` [PATCH v3 08/18] powerpc/crc32: " Eric Biggers
@ 2024-11-05 9:22 ` Michael Ellerman
0 siblings, 0 replies; 31+ messages in thread
From: Michael Ellerman @ 2024-11-05 9:22 UTC (permalink / raw)
To: Eric Biggers, linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
Eric Biggers <ebiggers@kernel.org> writes:
> From: Eric Biggers <ebiggers@google.com>
>
> Move the powerpc CRC32C assembly code into the lib directory and wire it
> up to the library interface. This allows it to be used without going
> through the crypto API. It remains usable via the crypto API too via
> the shash algorithms that use the library interface. Thus all the
> arch-specific "shash" code becomes unnecessary and is removed.
>
> Note: to see the diff from arch/powerpc/crypto/crc32c-vpmsum_glue.c to
> arch/powerpc/lib/crc32-glue.c, view this commit with 'git show -M10'.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> arch/powerpc/Kconfig | 1 +
> arch/powerpc/configs/powernv_defconfig | 1 -
> arch/powerpc/configs/ppc64_defconfig | 1 -
> arch/powerpc/crypto/Kconfig | 15 +-
> arch/powerpc/crypto/Makefile | 2 -
> arch/powerpc/crypto/crc32c-vpmsum_glue.c | 173 ------------------
> arch/powerpc/crypto/crct10dif-vpmsum_asm.S | 2 +-
> arch/powerpc/lib/Makefile | 3 +
> arch/powerpc/lib/crc32-glue.c | 92 ++++++++++
> .../{crypto => lib}/crc32-vpmsum_core.S | 0
> .../{crypto => lib}/crc32c-vpmsum_asm.S | 0
> 11 files changed, 98 insertions(+), 192 deletions(-)
> delete mode 100644 arch/powerpc/crypto/crc32c-vpmsum_glue.c
> create mode 100644 arch/powerpc/lib/crc32-glue.c
> rename arch/powerpc/{crypto => lib}/crc32-vpmsum_core.S (100%)
> rename arch/powerpc/{crypto => lib}/crc32c-vpmsum_asm.S (100%)
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
...
> deleted file mode 100644
> index 63760b7dbb76..000000000000
> --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
> +++ /dev/null
> @@ -1,173 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0-only
> -
...
> -static int __init crc32c_vpmsum_mod_init(void)
> -{
> - if (!cpu_has_feature(CPU_FTR_ARCH_207S))
> - return -ENODEV;
> -
> - return crypto_register_shash(&alg);
> -}
> -
> -static void __exit crc32c_vpmsum_mod_fini(void)
> -{
> - crypto_unregister_shash(&alg);
> -}
> -
> -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
> -module_exit(crc32c_vpmsum_mod_fini);
> -
> -MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
> -MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructions");
> -MODULE_LICENSE("GPL");
> -MODULE_ALIAS_CRYPTO("crc32c");
> -MODULE_ALIAS_CRYPTO("crc32c-vpmsum");
...
> new file mode 100644
> index 000000000000..e9730f028afb
> --- /dev/null
> +++ b/arch/powerpc/lib/crc32-glue.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: GPL-2.0-only
...
> +
> +static int __init crc32_powerpc_init(void)
> +{
> + if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
> + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
> + static_branch_enable(&have_vec_crypto);
For any other reviewers, this looks like a new cpu feature check, but
it's not. In the old code there was a module feature check:
module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
And PPC_MODULE_FEATURE_VEC_CRYPTO maps to PPC_FEATURE2_VEC_CRYPTO, so
the logic is equivalent.
cheers
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 09/18] s390/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (7 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 08/18] powerpc/crc32: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 10/18] sparc/crc32: " Eric Biggers
` (8 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the s390 CRC32 assembly code into the lib directory and wire it up
to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/s390/crypto/crc32-vx.c to
arch/s390/lib/crc32-glue.c, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/s390/Kconfig | 1 +
arch/s390/configs/debug_defconfig | 1 -
arch/s390/configs/defconfig | 1 -
arch/s390/crypto/Kconfig | 12 -
arch/s390/crypto/Makefile | 2 -
arch/s390/crypto/crc32-vx.c | 306 -------------------------
arch/s390/lib/Makefile | 3 +
arch/s390/lib/crc32-glue.c | 92 ++++++++
arch/s390/{crypto => lib}/crc32-vx.h | 0
arch/s390/{crypto => lib}/crc32be-vx.c | 0
arch/s390/{crypto => lib}/crc32le-vx.c | 0
11 files changed, 96 insertions(+), 322 deletions(-)
delete mode 100644 arch/s390/crypto/crc32-vx.c
create mode 100644 arch/s390/lib/crc32-glue.c
rename arch/s390/{crypto => lib}/crc32-vx.h (100%)
rename arch/s390/{crypto => lib}/crc32be-vx.c (100%)
rename arch/s390/{crypto => lib}/crc32le-vx.c (100%)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d339fe4fdedf..d1fde8b941d2 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -63,10 +63,11 @@ config S390
select ARCH_BINFMT_ELF_STATE
select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE
select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index fb0e9a1d9be2..fd83d8958f0b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -792,11 +792,10 @@ CONFIG_CRYPTO_ZSTD=m
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 88be0a734b60..3bdeb6d5cbd9 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -779,11 +779,10 @@ CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_JITTERENTROPY_OSR=1
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
-CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA3_256_S390=m
CONFIG_CRYPTO_SHA3_512_S390=m
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
index d3eb3a233693..b760232537f1 100644
--- a/arch/s390/crypto/Kconfig
+++ b/arch/s390/crypto/Kconfig
@@ -1,21 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
menu "Accelerated Cryptographic Algorithms for CPU (s390)"
-config CRYPTO_CRC32_S390
- tristate "CRC32c and CRC32"
- depends on S390
- select CRYPTO_HASH
- select CRC32
- help
- CRC32c and CRC32 CRC algorithms
-
- Architecture: s390
-
- It is available with IBM z13 or later.
-
config CRYPTO_SHA512_S390
tristate "Hash functions: SHA-384 and SHA-512"
depends on S390
select CRYPTO_HASH
help
diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile
index a0cb96937c3d..14dafadbcbed 100644
--- a/arch/s390/crypto/Makefile
+++ b/arch/s390/crypto/Makefile
@@ -12,11 +12,9 @@ obj-$(CONFIG_CRYPTO_DES_S390) += des_s390.o
obj-$(CONFIG_CRYPTO_AES_S390) += aes_s390.o
obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o
obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o
obj-$(CONFIG_S390_PRNG) += prng.o
obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o
-obj-$(CONFIG_CRYPTO_CRC32_S390) += crc32-vx_s390.o
obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o
obj-y += arch_random.o
-crc32-vx_s390-y := crc32-vx.o crc32le-vx.o crc32be-vx.o
chacha_s390-y := chacha-glue.o chacha-s390.o
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
deleted file mode 100644
index 89a10337e6ea..000000000000
--- a/arch/s390/crypto/crc32-vx.c
+++ /dev/null
@@ -1,306 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Crypto-API module for CRC-32 algorithms implemented with the
- * z/Architecture Vector Extension Facility.
- *
- * Copyright IBM Corp. 2015
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
- */
-#define KMSG_COMPONENT "crc32-vx"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/cpufeature.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <asm/fpu.h>
-#include "crc32-vx.h"
-
-#define CRC32_BLOCK_SIZE 1
-#define CRC32_DIGEST_SIZE 4
-
-#define VX_MIN_LEN 64
-#define VX_ALIGNMENT 16L
-#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
-
-struct crc_ctx {
- u32 key;
-};
-
-struct crc_desc_ctx {
- u32 crc;
-};
-
-/*
- * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
- *
- * Creates a function to perform a particular CRC-32 computation. Depending
- * on the message buffer, the hardware-accelerated or software implementation
- * is used. Note that the message buffer is aligned to improve fetch
- * operations of VECTOR LOAD MULTIPLE instructions.
- *
- */
-#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
- static u32 __pure ___fname(u32 crc, \
- unsigned char const *data, size_t datalen) \
- { \
- unsigned long prealign, aligned, remaining; \
- DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
- \
- if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \
- return ___crc32_sw(crc, data, datalen); \
- \
- if ((unsigned long)data & VX_ALIGN_MASK) { \
- prealign = VX_ALIGNMENT - \
- ((unsigned long)data & VX_ALIGN_MASK); \
- datalen -= prealign; \
- crc = ___crc32_sw(crc, data, prealign); \
- data = (void *)((unsigned long)data + prealign); \
- } \
- \
- aligned = datalen & ~VX_ALIGN_MASK; \
- remaining = datalen & VX_ALIGN_MASK; \
- \
- kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
- crc = ___crc32_vx(crc, data, aligned); \
- kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
- \
- if (remaining) \
- crc = ___crc32_sw(crc, data + aligned, remaining); \
- \
- return crc; \
- }
-
-DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le)
-DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be)
-DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le)
-
-
-static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = 0;
- return 0;
-}
-
-static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm)
-{
- struct crc_ctx *mctx = crypto_tfm_ctx(tfm);
-
- mctx->key = ~0;
- return 0;
-}
-
-static int crc32_vx_init(struct shash_desc *desc)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(desc->tfm);
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- ctx->crc = mctx->key;
- return 0;
-}
-
-static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = le32_to_cpu(*(__le32 *)newkey);
- return 0;
-}
-
-static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey,
- unsigned int newkeylen)
-{
- struct crc_ctx *mctx = crypto_shash_ctx(tfm);
-
- if (newkeylen != sizeof(mctx->key))
- return -EINVAL;
- mctx->key = be32_to_cpu(*(__be32 *)newkey);
- return 0;
-}
-
-static int crc32le_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__le32 *)out = cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int crc32be_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- *(__be32 *)out = cpu_to_be32p(&ctx->crc);
- return 0;
-}
-
-static int crc32c_vx_final(struct shash_desc *desc, u8 *out)
-{
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc);
-
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32p(&ctx->crc);
- return 0;
-}
-
-static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = cpu_to_le32(crc32_le_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__be32 *)out = cpu_to_be32(crc32_be_vx(*crc, data, len));
- return 0;
-}
-
-static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len,
- u8 *out)
-{
- /*
- * Perform a final XOR with 0xFFFFFFFF to be in sync
- * with the generic crc32c shash implementation.
- */
- *(__le32 *)out = ~cpu_to_le32(crc32c_le_vx(*crc, data, len));
- return 0;
-}
-
-
-#define CRC32_VX_FINUP(alg, func) \
- static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \
- data, datalen, out); \
- }
-
-CRC32_VX_FINUP(crc32le, crc32_le_vx)
-CRC32_VX_FINUP(crc32be, crc32_be_vx)
-CRC32_VX_FINUP(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_DIGEST(alg, func) \
- static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \
- unsigned int len, u8 *out) \
- { \
- return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \
- data, len, out); \
- }
-
-CRC32_VX_DIGEST(crc32le, crc32_le_vx)
-CRC32_VX_DIGEST(crc32be, crc32_be_vx)
-CRC32_VX_DIGEST(crc32c, crc32c_le_vx)
-
-#define CRC32_VX_UPDATE(alg, func) \
- static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \
- unsigned int datalen) \
- { \
- struct crc_desc_ctx *ctx = shash_desc_ctx(desc); \
- ctx->crc = func(ctx->crc, data, datalen); \
- return 0; \
- }
-
-CRC32_VX_UPDATE(crc32le, crc32_le_vx)
-CRC32_VX_UPDATE(crc32be, crc32_be_vx)
-CRC32_VX_UPDATE(crc32c, crc32c_le_vx)
-
-
-static struct shash_alg crc32_vx_algs[] = {
- /* CRC-32 LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32le_vx_update,
- .final = crc32le_vx_final,
- .finup = crc32le_vx_finup,
- .digest = crc32le_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32 BE */
- {
- .init = crc32_vx_init,
- .setkey = crc32be_vx_setkey,
- .update = crc32be_vx_update,
- .final = crc32be_vx_final,
- .finup = crc32be_vx_finup,
- .digest = crc32be_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32be",
- .cra_driver_name = "crc32be-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_zero,
- },
- },
- /* CRC-32C LE */
- {
- .init = crc32_vx_init,
- .setkey = crc32_vx_setkey,
- .update = crc32c_vx_update,
- .final = crc32c_vx_final,
- .finup = crc32c_vx_finup,
- .digest = crc32c_vx_digest,
- .descsize = sizeof(struct crc_desc_ctx),
- .digestsize = CRC32_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-vx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CRC32_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crc_ctx),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_vx_cra_init_invert,
- },
- },
-};
-
-
-static int __init crc_vx_mod_init(void)
-{
- return crypto_register_shashes(crc32_vx_algs,
- ARRAY_SIZE(crc32_vx_algs));
-}
-
-static void __exit crc_vx_mod_exit(void)
-{
- crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs));
-}
-
-module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init);
-module_exit(crc_vx_mod_exit);
-
-MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-vx");
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-vx");
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index f43f897d3fc0..14bbfe50033c 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -22,5 +22,8 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o
obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
+
+obj-$(CONFIG_CRC32_ARCH) += crc32-s390.o
+crc32-s390-y := crc32-glue.o crc32le-vx.o crc32be-vx.o
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
new file mode 100644
index 000000000000..137080e61f90
--- /dev/null
+++ b/arch/s390/lib/crc32-glue.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CRC-32 implemented with the z/Architecture Vector Extension Facility.
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT "crc32-vx"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/module.h>
+#include <linux/cpufeature.h>
+#include <linux/crc32.h>
+#include <asm/fpu.h>
+#include "crc32-vx.h"
+
+#define VX_MIN_LEN 64
+#define VX_ALIGNMENT 16L
+#define VX_ALIGN_MASK (VX_ALIGNMENT - 1)
+
+static DEFINE_STATIC_KEY_FALSE(have_vxrs);
+
+/*
+ * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension
+ *
+ * Creates a function to perform a particular CRC-32 computation. Depending
+ * on the message buffer, the hardware-accelerated or software implementation
+ * is used. Note that the message buffer is aligned to improve fetch
+ * operations of VECTOR LOAD MULTIPLE instructions.
+ */
+#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \
+ u32 ___fname(u32 crc, const u8 *data, size_t datalen) \
+ { \
+ unsigned long prealign, aligned, remaining; \
+ DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \
+ \
+ if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \
+ !static_branch_likely(&have_vxrs)) \
+ return ___crc32_sw(crc, data, datalen); \
+ \
+ if ((unsigned long)data & VX_ALIGN_MASK) { \
+ prealign = VX_ALIGNMENT - \
+ ((unsigned long)data & VX_ALIGN_MASK); \
+ datalen -= prealign; \
+ crc = ___crc32_sw(crc, data, prealign); \
+ data = (void *)((unsigned long)data + prealign); \
+ } \
+ \
+ aligned = datalen & ~VX_ALIGN_MASK; \
+ remaining = datalen & VX_ALIGN_MASK; \
+ \
+ kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \
+ crc = ___crc32_vx(crc, data, aligned); \
+ kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \
+ \
+ if (remaining) \
+ crc = ___crc32_sw(crc, data + aligned, remaining); \
+ \
+ return crc; \
+ } \
+ EXPORT_SYMBOL(___fname);
+
+DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
+DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
+DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
+
+static int __init crc32_s390_init(void)
+{
+ if (cpu_have_feature(S390_CPU_FEATURE_VXRS))
+ static_branch_enable(&have_vxrs);
+ return 0;
+}
+arch_initcall(crc32_s390_init);
+
+static void __exit crc32_s390_exit(void)
+{
+}
+module_exit(crc32_s390_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_vxrs))
+ return CRC32_LE_OPTIMIZATION |
+ CRC32_BE_OPTIMIZATION |
+ CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_AUTHOR("Hendrik Brueckner <brueckner@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extension Facility");
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h
similarity index 100%
rename from arch/s390/crypto/crc32-vx.h
rename to arch/s390/lib/crc32-vx.h
diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c
similarity index 100%
rename from arch/s390/crypto/crc32be-vx.c
rename to arch/s390/lib/crc32be-vx.c
diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c
similarity index 100%
rename from arch/s390/crypto/crc32le-vx.c
rename to arch/s390/lib/crc32le-vx.c
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 10/18] sparc/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (8 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 09/18] s390/crc32: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 11/18] x86/crc32: update prototype for crc_pcl() Eric Biggers
` (7 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the sparc CRC32C assembly code into the lib directory and wire it
up to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Note: to see the diff from arch/sparc/crypto/crc32c_glue.c to
arch/sparc/lib/crc32_glue.c, view this commit with 'git show -M10'.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/sparc/Kconfig | 1 +
arch/sparc/crypto/Kconfig | 10 --
arch/sparc/crypto/Makefile | 4 -
arch/sparc/crypto/crc32c_glue.c | 184 ------------------------
arch/sparc/lib/Makefile | 2 +
arch/sparc/lib/crc32_glue.c | 93 ++++++++++++
arch/sparc/{crypto => lib}/crc32c_asm.S | 2 +-
7 files changed, 97 insertions(+), 199 deletions(-)
delete mode 100644 arch/sparc/crypto/crc32c_glue.c
create mode 100644 arch/sparc/lib/crc32_glue.c
rename arch/sparc/{crypto => lib}/crc32c_asm.S (92%)
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index dcfdb7f1dae9..0f88123925a4 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -108,10 +108,11 @@ config SPARC64
select ARCH_HAS_GIGANTIC_PAGE
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_SETUP_PER_CPU_AREA
select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
+ select ARCH_HAS_CRC32
config ARCH_PROC_KCORE_TEXT
def_bool y
config CPU_BIG_ENDIAN
diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig
index cfe5102b1c68..e858597de89d 100644
--- a/arch/sparc/crypto/Kconfig
+++ b/arch/sparc/crypto/Kconfig
@@ -14,20 +14,10 @@ config CRYPTO_DES_SPARC64
Length-preserving ciphers: DES with ECB and CBC modes
Length-preserving ciphers: Tripe DES EDE with ECB and CBC modes
Architecture: sparc64
-config CRYPTO_CRC32C_SPARC64
- tristate "CRC32c"
- depends on SPARC64
- select CRYPTO_HASH
- select CRC32
- help
- CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
-
- Architecture: sparc64
-
config CRYPTO_MD5_SPARC64
tristate "Digests: MD5"
depends on SPARC64
select CRYPTO_MD5
select CRYPTO_HASH
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index d257186c27d1..a2d7fca40cb4 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -10,17 +10,13 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
-obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
-
sha1-sparc64-y := sha1_asm.o sha1_glue.o
sha256-sparc64-y := sha256_asm.o sha256_glue.o
sha512-sparc64-y := sha512_asm.o sha512_glue.o
md5-sparc64-y := md5_asm.o md5_glue.o
aes-sparc64-y := aes_asm.o aes_glue.o
des-sparc64-y := des_asm.o des_glue.o
camellia-sparc64-y := camellia_asm.o camellia_glue.o
-
-crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
deleted file mode 100644
index 913b9a09e885..000000000000
--- a/arch/sparc/crypto/crc32c_glue.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
- *
- * This is based largely upon arch/x86/crypto/crc32c-intel.c
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- * Kent Liu <kent.liu@intel.com>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-
-#include <crypto/internal/hash.h>
-
-#include <asm/pstate.h>
-#include <asm/elf.h>
-#include <linux/unaligned.h>
-
-#include "opcodes.h"
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32))
- return -EINVAL;
- *mctx = get_unaligned_le32(key);
- return 0;
-}
-
-static int crc32c_sparc64_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = *mctx;
-
- return 0;
-}
-
-extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
-
-static u32 crc32c_compute(u32 crc, const u8 *data, unsigned int len)
-{
- unsigned int n = -(uintptr_t)data & 7;
-
- if (n) {
- /* Data isn't 8-byte aligned. Align it. */
- n = min(n, len);
- crc = __crc32c_le(crc, data, n);
- data += n;
- len -= n;
- }
- n = len & ~7U;
- if (n) {
- crc32c_sparc64(&crc, (const u64 *)data, n);
- data += n;
- len -= n;
- }
- if (len)
- crc = __crc32c_le(crc, data, len);
- return crc;
-}
-
-static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = crc32c_compute(*crcp, data, len);
- return 0;
-}
-
-static int __crc32c_sparc64_finup(const u32 *crcp, const u8 *data,
- unsigned int len, u8 *out)
-{
- put_unaligned_le32(~crc32c_compute(*crcp, data, len), out);
- return 0;
-}
-
-static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- put_unaligned_le32(~*crcp, out);
- return 0;
-}
-
-static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
-
- return 0;
-}
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-static struct shash_alg alg = {
- .setkey = crc32c_sparc64_setkey,
- .init = crc32c_sparc64_init,
- .update = crc32c_sparc64_update,
- .final = crc32c_sparc64_final,
- .finup = crc32c_sparc64_finup,
- .digest = crc32c_sparc64_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = crc32c_sparc64_cra_init,
- }
-};
-
-static bool __init sparc64_has_crc32c_opcode(void)
-{
- unsigned long cfr;
-
- if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
- return false;
-
- __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
- if (!(cfr & CFR_CRC32C))
- return false;
-
- return true;
-}
-
-static int __init crc32c_sparc64_mod_init(void)
-{
- if (sparc64_has_crc32c_opcode()) {
- pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
- return crypto_register_shash(&alg);
- }
- pr_info("sparc64 crc32c opcode not available.\n");
- return -ENODEV;
-}
-
-static void __exit crc32c_sparc64_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(crc32c_sparc64_mod_init);
-module_exit(crc32c_sparc64_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
-
-MODULE_ALIAS_CRYPTO("crc32c");
-
-#include "crop_devid.c"
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index ee5091dd67ed..5724d0f356eb 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -51,5 +51,7 @@ lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
obj-$(CONFIG_SPARC64) += iomap.o
obj-$(CONFIG_SPARC32) += atomic32.o
obj-$(CONFIG_SPARC64) += PeeCeeI.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-sparc.o
+crc32-sparc-y := crc32_glue.o crc32c_asm.o
diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c
new file mode 100644
index 000000000000..41076d2b1fd2
--- /dev/null
+++ b/arch/sparc/lib/crc32_glue.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/crc32c-intel.c
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Authors: Austin Zhang <austin_zhang@linux.intel.com>
+ * Kent Liu <kent.liu@intel.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode);
+
+u32 crc32_le_arch(u32 crc, const u8 *data, size_t len)
+{
+ return crc32_le_base(crc, data, len);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
+
+u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
+{
+ size_t n = -(uintptr_t)data & 7;
+
+ if (!static_branch_likely(&have_crc32c_opcode))
+ return crc32c_le_base(crc, data, len);
+
+ if (n) {
+ /* Data isn't 8-byte aligned. Align it. */
+ n = min(n, len);
+ crc = crc32c_le_base(crc, data, n);
+ data += n;
+ len -= n;
+ }
+ n = len & ~7U;
+ if (n) {
+ crc32c_sparc64(&crc, (const u64 *)data, n);
+ data += n;
+ len -= n;
+ }
+ if (len)
+ crc = crc32c_le_base(crc, data, len);
+ return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
+{
+ return crc32_be_base(crc, data, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_sparc_init(void)
+{
+ unsigned long cfr;
+
+ if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+ return 0;
+
+ __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+ if (!(cfr & CFR_CRC32C))
+ return 0;
+
+ static_branch_enable(&have_crc32c_opcode);
+ pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
+ return 0;
+}
+arch_initcall(crc32_sparc_init);
+
+static void __exit crc32_sparc_exit(void)
+{
+}
+module_exit(crc32_sparc_exit);
+
+u32 crc32_optimizations(void)
+{
+ if (static_key_enabled(&have_crc32c_opcode))
+ return CRC32C_OPTIMIZATION;
+ return 0;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S
similarity index 92%
rename from arch/sparc/crypto/crc32c_asm.S
rename to arch/sparc/lib/crc32c_asm.S
index b8659a479242..ee454fa6aed6 100644
--- a/arch/sparc/crypto/crc32c_asm.S
+++ b/arch/sparc/lib/crc32c_asm.S
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
-#include "opcodes.h"
+#include "../crypto/opcodes.h"
ENTRY(crc32c_sparc64)
/* %o0=crc32p, %o1=data_ptr, %o2=len */
VISEntryHalf
lda [%o0] ASI_PL, %f1
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 11/18] x86/crc32: update prototype for crc_pcl()
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (9 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 10/18] sparc/crc32: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 12/18] x86/crc32: update prototype for crc32_pclmul_le_16() Eric Biggers
` (6 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
- Change the len parameter from unsigned int to size_t, so that the
library function which takes a size_t can safely use this code.
- Rename to crc32c_x86_3way() which is much clearer.
- Move the crc parameter to the front, as this is the usual convention.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/crc32c-intel_glue.c | 7 ++-
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 63 ++++++++++++-----------
2 files changed, 35 insertions(+), 35 deletions(-)
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 52c5d47ef5a1..603d159de400 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -39,12 +39,11 @@
* size is >= 512 to account
* for fpu state save/restore overhead.
*/
#define CRC32C_PCL_BREAKEVEN 512
-asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
- unsigned int crc_init);
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
@@ -157,11 +156,11 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
- *crcp = crc_pcl(data, len, *crcp);
+ *crcp = crc32c_x86_3way(*crcp, data, len);
kernel_fpu_end();
} else
*crcp = crc32c_intel_le_hw(*crcp, data, len);
return 0;
}
@@ -169,11 +168,11 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
- *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
+ *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
kernel_fpu_end();
} else
*(__le32 *)out =
~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
return 0;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 752812bc4991..9b8770503bbc 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -50,19 +50,20 @@
# Define threshold below which buffers are considered "small" and routed to
# regular CRC code that does not interleave the CRC instructions.
#define SMALL_SIZE 200
-# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
+# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
.text
-SYM_FUNC_START(crc_pcl)
-#define bufp %rdi
-#define bufp_d %edi
-#define len %esi
-#define crc_init %edx
-#define crc_init_q %rdx
+SYM_FUNC_START(crc32c_x86_3way)
+#define crc0 %edi
+#define crc0_q %rdi
+#define bufp %rsi
+#define bufp_d %esi
+#define len %rdx
+#define len_dw %edx
#define n_misaligned %ecx /* overlaps chunk_bytes! */
#define n_misaligned_q %rcx
#define chunk_bytes %ecx /* overlaps n_misaligned! */
#define chunk_bytes_q %rcx
#define crc1 %r8
@@ -83,13 +84,13 @@ SYM_FUNC_START(crc_pcl)
# Process 1 <= n_misaligned <= 7 bytes individually in order to align
# the remaining data to an 8-byte boundary.
.Ldo_align:
movq (bufp), %rax
add n_misaligned_q, bufp
- sub n_misaligned, len
+ sub n_misaligned_q, len
.Lalign_loop:
- crc32b %al, crc_init # compute crc32 of 1-byte
+ crc32b %al, crc0 # compute crc32 of 1-byte
shr $8, %rax # get next byte
dec n_misaligned
jne .Lalign_loop
.Laligned:
@@ -100,11 +101,11 @@ SYM_FUNC_START(crc_pcl)
cmp $128*24, len
jae .Lfull_block
.Lpartial_block:
# Compute floor(len / 24) to get num qwords to process from each lane.
- imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
+ imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24)
shr $16, %eax
jmp .Lcrc_3lanes
.Lfull_block:
# Processing 128 qwords from each lane.
@@ -123,20 +124,20 @@ SYM_FUNC_START(crc_pcl)
jl .Lcrc_3lanes_4x_done
# Unroll the loop by a factor of 4 to reduce the overhead of the loop
# bookkeeping instructions, which can compete with crc32q for the ALUs.
.Lcrc_3lanes_4x_loop:
- crc32q (bufp), crc_init_q
+ crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
crc32q (bufp,chunk_bytes_q,2), crc2
- crc32q 8(bufp), crc_init_q
+ crc32q 8(bufp), crc0_q
crc32q 8(bufp,chunk_bytes_q), crc1
crc32q 8(bufp,chunk_bytes_q,2), crc2
- crc32q 16(bufp), crc_init_q
+ crc32q 16(bufp), crc0_q
crc32q 16(bufp,chunk_bytes_q), crc1
crc32q 16(bufp,chunk_bytes_q,2), crc2
- crc32q 24(bufp), crc_init_q
+ crc32q 24(bufp), crc0_q
crc32q 24(bufp,chunk_bytes_q), crc1
crc32q 24(bufp,chunk_bytes_q,2), crc2
add $32, bufp
sub $4, %eax
jge .Lcrc_3lanes_4x_loop
@@ -144,42 +145,42 @@ SYM_FUNC_START(crc_pcl)
.Lcrc_3lanes_4x_done:
add $4, %eax
jz .Lcrc_3lanes_last_qword
.Lcrc_3lanes_1x_loop:
- crc32q (bufp), crc_init_q
+ crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
crc32q (bufp,chunk_bytes_q,2), crc2
add $8, bufp
dec %eax
jnz .Lcrc_3lanes_1x_loop
.Lcrc_3lanes_last_qword:
- crc32q (bufp), crc_init_q
+ crc32q (bufp), crc0_q
crc32q (bufp,chunk_bytes_q), crc1
# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
################################################################
## 4) Combine three results:
################################################################
lea (K_table-8)(%rip), %rax # first entry is for idx 1
pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
- sub %eax, len # len -= chunk_bytes * 3
+ sub %rax, len # len -= chunk_bytes * 3
- movq crc_init_q, %xmm1 # CRC for block 1
+ movq crc0_q, %xmm1 # CRC for block 1
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
movq crc1, %xmm2 # CRC for block 2
pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
pxor %xmm2,%xmm1
movq %xmm1, %rax
xor (bufp,chunk_bytes_q,2), %rax
- mov crc2, crc_init_q
- crc32 %rax, crc_init_q
+ mov crc2, crc0_q
+ crc32 %rax, crc0_q
lea 8(bufp,chunk_bytes_q,2), bufp
################################################################
## 5) If more blocks remain, goto (2):
################################################################
@@ -191,38 +192,38 @@ SYM_FUNC_START(crc_pcl)
#######################################################################
## 6) Process any remainder without interleaving:
#######################################################################
.Lsmall:
- test len, len
+ test len_dw, len_dw
jz .Ldone
- mov len, %eax
+ mov len_dw, %eax
shr $3, %eax
jz .Ldo_dword
.Ldo_qwords:
- crc32q (bufp), crc_init_q
+ crc32q (bufp), crc0_q
add $8, bufp
dec %eax
jnz .Ldo_qwords
.Ldo_dword:
- test $4, len
+ test $4, len_dw
jz .Ldo_word
- crc32l (bufp), crc_init
+ crc32l (bufp), crc0
add $4, bufp
.Ldo_word:
- test $2, len
+ test $2, len_dw
jz .Ldo_byte
- crc32w (bufp), crc_init
+ crc32w (bufp), crc0
add $2, bufp
.Ldo_byte:
- test $1, len
+ test $1, len_dw
jz .Ldone
- crc32b (bufp), crc_init
+ crc32b (bufp), crc0
.Ldone:
- mov crc_init, %eax
+ mov crc0, %eax
RET
-SYM_FUNC_END(crc_pcl)
+SYM_FUNC_END(crc32c_x86_3way)
.section .rodata, "a", @progbits
################################################################
## PCLMULQDQ tables
## Table is 128 entries x 2 words (8 bytes) each
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 12/18] x86/crc32: update prototype for crc32_pclmul_le_16()
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (10 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 11/18] x86/crc32: update prototype for crc_pcl() Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 13/18] x86/crc32: expose CRC32 functions through lib Eric Biggers
` (5 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
- Change the len parameter from unsigned int to size_t, so that the
library function which takes a size_t can safely use this code.
- Move the crc parameter to the front, as this is the usual convention.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/crc32-pclmul_asm.S | 19 +++++++++----------
arch/x86/crypto/crc32-pclmul_glue.c | 4 ++--
2 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 5d31137e2c7d..f9637789cac1 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -56,30 +56,29 @@
.octa 0x00000001F701164100000001DB710641
#define CONSTANT %xmm0
#ifdef __x86_64__
-#define BUF %rdi
-#define LEN %rsi
-#define CRC %edx
+#define CRC %edi
+#define BUF %rsi
+#define LEN %rdx
#else
-#define BUF %eax
-#define LEN %edx
-#define CRC %ecx
+#define CRC %eax
+#define BUF %edx
+#define LEN %ecx
#endif
.text
/**
* Calculate crc32
- * BUF - buffer (16 bytes aligned)
- * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
* CRC - initial crc32
+ * BUF - buffer (16 bytes aligned)
+ * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
* return %eax crc32
- * uint crc32_pclmul_le_16(unsigned char const *buffer,
- * size_t len, uint crc32)
+ * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
*/
SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 9f5e342b9845..9d14eac51c5b 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -44,11 +44,11 @@
#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
* for crc32_pclmul_le_16 */
#define SCALE_F 16L /* size of xmm register */
#define SCALE_F_MASK (SCALE_F - 1)
-u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
+u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
static u32 __attribute__((pure))
crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
{
unsigned int iquotient;
@@ -69,11 +69,11 @@ static u32 __attribute__((pure))
}
iquotient = len & (~SCALE_F_MASK);
iremainder = len & SCALE_F_MASK;
kernel_fpu_begin();
- crc = crc32_pclmul_le_16(p, iquotient, crc);
+ crc = crc32_pclmul_le_16(crc, p, iquotient);
kernel_fpu_end();
if (iremainder)
crc = crc32_le(crc, p + iquotient, iremainder);
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 13/18] x86/crc32: expose CRC32 functions through lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (11 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 12/18] x86/crc32: update prototype for crc32_pclmul_le_16() Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 14/18] lib/crc32: make crc32c() go directly to lib Eric Biggers
` (4 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Move the x86 CRC32 assembly code into the lib directory and wire it up
to the library interface. This allows it to be used without going
through the crypto API. It remains usable via the crypto API too via
the shash algorithms that use the library interface. Thus all the
arch-specific "shash" code becomes unnecessary and is removed.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/Kconfig | 1 +
arch/x86/crypto/Kconfig | 22 --
arch/x86/crypto/Makefile | 7 -
arch/x86/crypto/crc32-pclmul_glue.c | 202 --------------
arch/x86/crypto/crc32c-intel_glue.c | 249 ------------------
arch/x86/lib/Makefile | 4 +
arch/x86/lib/crc32-glue.c | 124 +++++++++
.../crc32-pclmul_asm.S => lib/crc32-pclmul.S} | 0
.../crc32c-3way.S} | 0
drivers/target/iscsi/Kconfig | 1 -
10 files changed, 129 insertions(+), 481 deletions(-)
delete mode 100644 arch/x86/crypto/crc32-pclmul_glue.c
delete mode 100644 arch/x86/crypto/crc32c-intel_glue.c
create mode 100644 arch/x86/lib/crc32-glue.c
rename arch/x86/{crypto/crc32-pclmul_asm.S => lib/crc32-pclmul.S} (100%)
rename arch/x86/{crypto/crc32c-pcl-intel-asm_64.S => lib/crc32c-3way.S} (100%)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 16354dfa6d96..e7470de11cec 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -74,10 +74,11 @@ config X86
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig
index 7b1bebed879d..1ca53e847966 100644
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -490,32 +490,10 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
GCM GHASH hash function (NIST SP800-38D)
Architecture: x86_64 using:
- CLMUL-NI (carry-less multiplication new instructions)
-config CRYPTO_CRC32C_INTEL
- tristate "CRC32c (SSE4.2/PCLMULQDQ)"
- depends on X86
- select CRYPTO_HASH
- help
- CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
-
- Architecture: x86 (32-bit and 64-bit) using:
- - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction
- - PCLMULQDQ (carry-less multiplication)
-
-config CRYPTO_CRC32_PCLMUL
- tristate "CRC32 (PCLMULQDQ)"
- depends on X86
- select CRYPTO_HASH
- select CRC32
- help
- CRC32 CRC algorithm (IEEE 802.3)
-
- Architecture: x86 (32-bit and 64-bit) using:
- - PCLMULQDQ (carry-less multiplication)
-
config CRYPTO_CRCT10DIF_PCLMUL
tristate "CRCT10DIF (PCLMULQDQ)"
depends on X86 && 64BIT && CRC_T10DIF
select CRYPTO_HASH
help
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 53b4a277809e..030b925ca4e2 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -73,17 +73,10 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
-obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
-crc32c-intel-y := crc32c-intel_glue.o
-crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
-
-obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
-crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
-
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
deleted file mode 100644
index 9d14eac51c5b..000000000000
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-#define PCLMUL_MIN_LEN 64L /* minimum size of buffer
- * for crc32_pclmul_le_16 */
-#define SCALE_F 16L /* size of xmm register */
-#define SCALE_F_MASK (SCALE_F - 1)
-
-u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
-
-static u32 __attribute__((pure))
- crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
-{
- unsigned int iquotient;
- unsigned int iremainder;
- unsigned int prealign;
-
- if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable())
- return crc32_le(crc, p, len);
-
- if ((long)p & SCALE_F_MASK) {
- /* align p to 16 byte */
- prealign = SCALE_F - ((long)p & SCALE_F_MASK);
-
- crc = crc32_le(crc, p, prealign);
- len -= prealign;
- p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
- ~SCALE_F_MASK);
- }
- iquotient = len & (~SCALE_F_MASK);
- iremainder = len & SCALE_F_MASK;
-
- kernel_fpu_begin();
- crc = crc32_pclmul_le_16(crc, p, iquotient);
- kernel_fpu_end();
-
- if (iremainder)
- crc = crc32_le(crc, p + iquotient, iremainder);
-
- return crc;
-}
-
-static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = 0;
-
- return 0;
-}
-
-static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32))
- return -EINVAL;
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32_pclmul_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = *mctx;
-
- return 0;
-}
-
-static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = crc32_pclmul_le(*crcp, data, len);
- return 0;
-}
-
-/* No final XOR 0xFFFFFFFF, like crc32_le */
-static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
- return 0;
-}
-
-static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *(__le32 *)out = cpu_to_le32p(crcp);
- return 0;
-}
-
-static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static struct shash_alg alg = {
- .setkey = crc32_pclmul_setkey,
- .init = crc32_pclmul_init,
- .update = crc32_pclmul_update,
- .final = crc32_pclmul_final,
- .finup = crc32_pclmul_finup,
- .digest = crc32_pclmul_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32",
- .cra_driver_name = "crc32-pclmul",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = crc32_pclmul_cra_init,
- }
-};
-
-static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
- X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
-
-
-static int __init crc32_pclmul_mod_init(void)
-{
-
- if (!x86_match_cpu(crc32pclmul_cpu_id)) {
- pr_info("PCLMULQDQ-NI instructions are not detected.\n");
- return -ENODEV;
- }
- return crypto_register_shash(&alg);
-}
-
-static void __exit crc32_pclmul_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(crc32_pclmul_mod_init);
-module_exit(crc32_pclmul_mod_fini);
-
-MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
-MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQDQ");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
deleted file mode 100644
index 603d159de400..000000000000
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2A: Instruction Set Reference, A-M
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- * Kent Liu <kent.liu@intel.com>
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE 1
-#define CHKSUM_DIGEST_SIZE 4
-
-#define SCALE_F sizeof(unsigned long)
-
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
-
-#ifdef CONFIG_X86_64
-/*
- * use carryless multiply version of crc32c when buffer
- * size is >= 512 to account
- * for fpu state save/restore overhead.
- */
-#define CRC32C_PCL_BREAKEVEN 512
-
-asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-#endif /* CONFIG_X86_64 */
-
-static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
-{
- while (length--) {
- asm("crc32b %1, %0"
- : "+r" (crc) : "rm" (*data));
- data++;
- }
-
- return crc;
-}
-
-static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
-{
- unsigned int iquotient = len / SCALE_F;
- unsigned int iremainder = len % SCALE_F;
- unsigned long *ptmp = (unsigned long *)p;
-
- while (iquotient--) {
- asm(CRC32_INST
- : "+r" (crc) : "rm" (*ptmp));
- ptmp++;
- }
-
- if (iremainder)
- crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
- iremainder);
-
- return crc;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
- unsigned int keylen)
-{
- u32 *mctx = crypto_shash_ctx(hash);
-
- if (keylen != sizeof(u32))
- return -EINVAL;
- *mctx = le32_to_cpup((__le32 *)key);
- return 0;
-}
-
-static int crc32c_intel_init(struct shash_desc *desc)
-{
- u32 *mctx = crypto_shash_ctx(desc->tfm);
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = *mctx;
-
- return 0;
-}
-
-static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *crcp = crc32c_intel_le_hw(*crcp, data, len);
- return 0;
-}
-
-static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
- u8 *out)
-{
- *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
- return 0;
-}
-
-static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- *(__le32 *)out = ~cpu_to_le32p(crcp);
- return 0;
-}
-
-static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-
-static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
-{
- u32 *key = crypto_tfm_ctx(tfm);
-
- *key = ~0;
-
- return 0;
-}
-
-#ifdef CONFIG_X86_64
-static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- u32 *crcp = shash_desc_ctx(desc);
-
- /*
- * use faster PCL version if datasize is large enough to
- * overcome kernel fpu state save/restore overhead
- */
- if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
- kernel_fpu_begin();
- *crcp = crc32c_x86_3way(*crcp, data, len);
- kernel_fpu_end();
- } else
- *crcp = crc32c_intel_le_hw(*crcp, data, len);
- return 0;
-}
-
-static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
- u8 *out)
-{
- if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
- kernel_fpu_begin();
- *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
- kernel_fpu_end();
- } else
- *(__le32 *)out =
- ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
- return 0;
-}
-
-static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
- out);
-}
-#endif /* CONFIG_X86_64 */
-
-static struct shash_alg alg = {
- .setkey = crc32c_intel_setkey,
- .init = crc32c_intel_init,
- .update = crc32c_intel_update,
- .final = crc32c_intel_final,
- .finup = crc32c_intel_finup,
- .digest = crc32c_intel_digest,
- .descsize = sizeof(u32),
- .digestsize = CHKSUM_DIGEST_SIZE,
- .base = {
- .cra_name = "crc32c",
- .cra_driver_name = "crc32c-intel",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .cra_blocksize = CHKSUM_BLOCK_SIZE,
- .cra_ctxsize = sizeof(u32),
- .cra_module = THIS_MODULE,
- .cra_init = crc32c_intel_cra_init,
- }
-};
-
-static const struct x86_cpu_id crc32c_cpu_id[] = {
- X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
- {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
-
-static int __init crc32c_intel_mod_init(void)
-{
- if (!x86_match_cpu(crc32c_cpu_id))
- return -ENODEV;
-#ifdef CONFIG_X86_64
- if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
- alg.update = crc32c_pcl_intel_update;
- alg.finup = crc32c_pcl_intel_finup;
- alg.digest = crc32c_pcl_intel_digest;
- }
-#endif
- return crypto_register_shash(&alg);
-}
-
-static void __exit crc32c_intel_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(crc32c_intel_mod_init);
-module_exit(crc32c_intel_mod_fini);
-
-MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-intel");
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 98583a9dbab3..17510da06c9f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -36,10 +36,14 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
+crc32-x86-y := crc32-glue.o crc32-pclmul.o
+crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
+
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o
ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
new file mode 100644
index 000000000000..9fcc65db6cb5
--- /dev/null
+++ b/arch/x86/lib/crc32-glue.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x86-optimized CRC32 functions
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Copyright 2012 Xyratex Technology Limited
+ * Copyright 2024 Google LLC
+ */
+
+#include <asm/cpufeatures.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <linux/crc32.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/* minimum size of buffer for crc32_pclmul_le_16 */
+#define CRC32_PCLMUL_MIN_LEN 64
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
+ crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) {
+ size_t n = -(uintptr_t)p & 15;
+
+ /* align p to 16-byte boundary */
+ if (n) {
+ crc = crc32_le_base(crc, p, n);
+ p += n;
+ len -= n;
+ }
+ n = round_down(len, 16);
+ kernel_fpu_begin();
+ crc = crc32_pclmul_le_16(crc, p, n);
+ kernel_fpu_end();
+ p += n;
+ len -= n;
+ }
+ if (len)
+ crc = crc32_le_base(crc, p, len);
+ return crc;
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+#ifdef CONFIG_X86_64
+#define CRC32_INST "crc32q %1, %q0"
+#else
+#define CRC32_INST "crc32l %1, %0"
+#endif
+
+/*
+ * Use carryless multiply version of crc32c when buffer size is >= 512 to
+ * account for FPU state save/restore overhead.
+ */
+#define CRC32C_PCLMUL_BREAKEVEN 512
+
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ size_t num_longs;
+
+ if (!static_branch_likely(&have_crc32))
+ return crc32c_le_base(crc, p, len);
+
+ if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
+ crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) {
+ kernel_fpu_begin();
+ crc = crc32c_x86_3way(crc, p, len);
+ kernel_fpu_end();
+ return crc;
+ }
+
+ for (num_longs = len / sizeof(unsigned long);
+ num_longs != 0; num_longs--, p += sizeof(unsigned long))
+ asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+
+ for (len %= sizeof(unsigned long); len; len--, p++)
+ asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+
+ return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_x86_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_XMM4_2))
+ static_branch_enable(&have_crc32);
+ if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+ static_branch_enable(&have_pclmulqdq);
+ return 0;
+}
+arch_initcall(crc32_x86_init);
+
+static void __exit crc32_x86_exit(void)
+{
+}
+module_exit(crc32_x86_exit);
+
+u32 crc32_optimizations(void)
+{
+ u32 optimizations = 0;
+
+ if (static_key_enabled(&have_crc32))
+ optimizations |= CRC32C_OPTIMIZATION;
+ if (static_key_enabled(&have_pclmulqdq))
+ optimizations |= CRC32_LE_OPTIMIZATION;
+ return optimizations;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_DESCRIPTION("x86-optimized CRC32 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/lib/crc32-pclmul.S
similarity index 100%
rename from arch/x86/crypto/crc32-pclmul_asm.S
rename to arch/x86/lib/crc32-pclmul.S
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/lib/crc32c-3way.S
similarity index 100%
rename from arch/x86/crypto/crc32c-pcl-intel-asm_64.S
rename to arch/x86/lib/crc32c-3way.S
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 922b207bc69d..1c0517a12571 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -2,11 +2,10 @@
config ISCSI_TARGET
tristate "SCSI Target Mode Stack"
depends on INET
select CRYPTO
select CRYPTO_CRC32C
- select CRYPTO_CRC32C_INTEL if X86
help
Say M to enable the SCSI target mode stack. A SCSI target mode stack
is software that makes local storage available over a storage network
to a SCSI initiator system. The supported storage network technologies
include iSCSI, Fibre Channel and the SCSI RDMA Protocol (SRP).
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 14/18] lib/crc32: make crc32c() go directly to lib
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (12 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 13/18] x86/crc32: expose CRC32 functions through lib Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-03 22:31 ` [PATCH v3 15/18] ext4: switch to using the crc32c library Eric Biggers
` (3 subsequent siblings)
17 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Now that the lower level __crc32c_le() library function is optimized for
each architecture, make crc32c() just call that instead of taking an
inefficient and error-prone detour through the shash API.
Note: a future cleanup should make crc32c_le() be the actual library
function instead of __crc32c_le(). That will require updating callers
of __crc32c_le() to use crc32c_le() instead, and updating callers of
crc32c_le() that expect a 'const void *' arg to expect 'const u8 *'
instead. Similarly, a future cleanup should remove LIBCRC32C by making
everyone who is selecting it just select CRC32 directly instead.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
include/linux/crc32c.h | 7 ++--
lib/Kconfig | 10 ++----
lib/Makefile | 1 -
lib/libcrc32c.c | 74 ------------------------------------------
4 files changed, 8 insertions(+), 84 deletions(-)
delete mode 100644 lib/libcrc32c.c
diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
index 357ae4611a45..47eb78003c26 100644
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -1,12 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CRC32C_H
#define _LINUX_CRC32C_H
-#include <linux/types.h>
+#include <linux/crc32.h>
-extern u32 crc32c(u32 crc, const void *address, unsigned int length);
+static inline u32 crc32c(u32 crc, const void *address, unsigned int length)
+{
+ return __crc32c_le(crc, address, length);
+}
/* This macro exists for backwards-compatibility. */
#define crc32c_le crc32c
#endif /* _LINUX_CRC32C_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 07afcf214f35..b894ee64ff95 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -296,18 +296,14 @@ config CRC7
the kernel tree does. Such modules that use library CRC7
functions require M here.
config LIBCRC32C
tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
- select CRYPTO
- select CRYPTO_CRC32C
+ select CRC32
help
- This option is provided for the case where no in-kernel-tree
- modules require CRC32c functions, but a module built outside the
- kernel tree does. Such modules that use library CRC32c functions
- require M here. See Castagnoli93.
- Module will be libcrc32c.
+ This option just selects CRC32 and is provided for compatibility
+ purposes until the users are updated to select CRC32 directly.
config CRC8
tristate "CRC8 function"
help
This option provides CRC8 function. Drivers may select this
diff --git a/lib/Makefile b/lib/Makefile
index 773adf88af41..15646679aee2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -161,11 +161,10 @@ obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
obj-$(CONFIG_CRC64) += crc64.o
obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
obj-$(CONFIG_CRC4) += crc4.o
obj-$(CONFIG_CRC7) += crc7.o
-obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
obj-$(CONFIG_CRC8) += crc8.o
obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o
obj-$(CONFIG_XXHASH) += xxhash.o
obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
deleted file mode 100644
index 649e687413a0..000000000000
--- a/lib/libcrc32c.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * CRC32C
- *@Article{castagnoli-crc,
- * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
- * title = {{Optimization of Cyclic Redundancy-Check Codes with 24
- * and 32 Parity Bits}},
- * journal = IEEE Transactions on Communication,
- * year = {1993},
- * volume = {41},
- * number = {6},
- * pages = {},
- * month = {June},
- *}
- * Used by the iSCSI driver, possibly others, and derived from
- * the iscsi-crc.c module of the linux-iscsi driver at
- * http://linux-iscsi.sourceforge.net.
- *
- * Following the example of lib/crc32, this function is intended to be
- * flexible and useful for all users. Modules that currently have their
- * own crc32c, but hopefully may be able to use this one are:
- * net/sctp (please add all your doco to here if you change to
- * use this one!)
- * <endoflist>
- *
- * Copyright (c) 2004 Cisco Systems, Inc.
- */
-
-#include <crypto/hash.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/crc32c.h>
-
-static struct crypto_shash *tfm;
-
-u32 crc32c(u32 crc, const void *address, unsigned int length)
-{
- SHASH_DESC_ON_STACK(shash, tfm);
- u32 ret, *ctx = (u32 *)shash_desc_ctx(shash);
- int err;
-
- shash->tfm = tfm;
- *ctx = crc;
-
- err = crypto_shash_update(shash, address, length);
- BUG_ON(err);
-
- ret = *ctx;
- barrier_data(ctx);
- return ret;
-}
-
-EXPORT_SYMBOL(crc32c);
-
-static int __init libcrc32c_mod_init(void)
-{
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- return PTR_ERR_OR_ZERO(tfm);
-}
-
-static void __exit libcrc32c_mod_fini(void)
-{
- crypto_free_shash(tfm);
-}
-
-module_init(libcrc32c_mod_init);
-module_exit(libcrc32c_mod_fini);
-
-MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
-MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32c");
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* [PATCH v3 15/18] ext4: switch to using the crc32c library
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (13 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 14/18] lib/crc32: make crc32c() go directly to lib Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-04 15:59 ` Darrick J. Wong
2024-11-04 16:17 ` Theodore Ts'o
2024-11-03 22:31 ` [PATCH v3 16/18] jbd2: " Eric Biggers
` (2 subsequent siblings)
17 siblings, 2 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Now that the crc32c() library function directly takes advantage of
architecture-specific optimizations, it is unnecessary to go through the
crypto API. Just use crc32c(). This is much simpler, and it improves
performance due to eliminating the crypto API overhead.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
fs/ext4/Kconfig | 3 +--
fs/ext4/ext4.h | 25 +++----------------------
fs/ext4/super.c | 15 ---------------
3 files changed, 4 insertions(+), 39 deletions(-)
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e20d59221fc0..c9ca41d91a6c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -29,12 +29,11 @@ config EXT3_FS_SECURITY
config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select BUFFER_HEAD
select JBD2
select CRC16
- select CRYPTO
- select CRYPTO_CRC32C
+ select CRC32
select FS_IOMAP
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 44b0d418143c..99aa512a7de1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -31,11 +31,11 @@
#include <linux/wait.h>
#include <linux/sched/signal.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
-#include <crypto/hash.h>
+#include <linux/crc32c.h>
#include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#include <linux/fiemap.h>
#ifdef __KERNEL__
#include <linux/compat.h>
@@ -1660,13 +1660,10 @@ struct ext4_sb_info {
struct task_struct *s_mmp_tsk;
/* record the last minlen when FITRIM is called. */
unsigned long s_last_trim_minblks;
- /* Reference to checksum algorithm driver via cryptoapi */
- struct crypto_shash *s_chksum_driver;
-
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
/* Reclaim extents from extent status tree */
struct shrinker *s_es_shrinker;
@@ -2465,23 +2462,11 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define DX_HASH_LAST DX_HASH_SIPHASH
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[4];
- } desc;
-
- BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
-
- desc.shash.tfm = sbi->s_chksum_driver;
- *(u32 *)desc.ctx = crc;
-
- BUG_ON(crypto_shash_update(&desc.shash, address, length));
-
- return *(u32 *)desc.ctx;
+ return crc32c(crc, address, length);
}
#ifdef __KERNEL__
/* hash info structure used by the directory hash */
@@ -3278,15 +3263,11 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
extern int ext4_register_li_request(struct super_block *sb,
ext4_group_t first_not_zeroed);
static inline int ext4_has_metadata_csum(struct super_block *sb)
{
- WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
- !EXT4_SB(sb)->s_chksum_driver);
-
- return ext4_has_feature_metadata_csum(sb) &&
- (EXT4_SB(sb)->s_chksum_driver != NULL);
+ return ext4_has_feature_metadata_csum(sb);
}
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 16a4ce704460..1a821093cc0d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1371,12 +1371,10 @@ static void ext4_put_super(struct super_block *sb)
* Now that we are completely done shutting down the
* superblock, we need to actually destroy the kobject.
*/
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev, NULL);
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
@@ -4586,19 +4584,10 @@ static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_blo
return -EINVAL;
}
ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
ext4_orphan_file_block_trigger);
- /* Load the checksum driver */
- sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- int ret = PTR_ERR(sbi->s_chksum_driver);
- ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
- sbi->s_chksum_driver = NULL;
- return ret;
- }
-
/* Check superblock checksum */
if (!ext4_superblock_csum_verify(sb, es)) {
ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
"invalid superblock checksum. Run e2fsck?");
return -EFSBADCRC;
@@ -5638,13 +5627,10 @@ failed_mount8: __maybe_unused
flush_work(&sbi->s_sb_upd_work);
ext4_stop_mmpd(sbi);
del_timer_sync(&sbi->s_err_report);
ext4_group_desc_free(sbi);
failed_mount:
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
-
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
#ifdef CONFIG_QUOTA
@@ -7433,8 +7419,7 @@ static void __exit ext4_exit_fs(void)
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32c");
module_init(ext4_init_fs)
module_exit(ext4_exit_fs)
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 15/18] ext4: switch to using the crc32c library
2024-11-03 22:31 ` [PATCH v3 15/18] ext4: switch to using the crc32c library Eric Biggers
@ 2024-11-04 15:59 ` Darrick J. Wong
2024-11-04 17:48 ` Eric Biggers
2024-11-04 16:17 ` Theodore Ts'o
1 sibling, 1 reply; 31+ messages in thread
From: Darrick J. Wong @ 2024-11-04 15:59 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Sun, Nov 03, 2024 at 02:31:51PM -0800, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Now that the crc32c() library function directly takes advantage of
> architecture-specific optimizations, it is unnecessary to go through the
> crypto API. Just use crc32c(). This is much simpler, and it improves
> performance due to eliminating the crypto API overhead.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> fs/ext4/Kconfig | 3 +--
> fs/ext4/ext4.h | 25 +++----------------------
> fs/ext4/super.c | 15 ---------------
> 3 files changed, 4 insertions(+), 39 deletions(-)
>
> diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> index e20d59221fc0..c9ca41d91a6c 100644
> --- a/fs/ext4/Kconfig
> +++ b/fs/ext4/Kconfig
> @@ -29,12 +29,11 @@ config EXT3_FS_SECURITY
> config EXT4_FS
> tristate "The Extended 4 (ext4) filesystem"
> select BUFFER_HEAD
> select JBD2
> select CRC16
> - select CRYPTO
> - select CRYPTO_CRC32C
> + select CRC32
Hmm. Looking at your git branch (which was quite helpful to link to!) I
think for XFS we don't need to change the crc32c() calls, and the only
porting work that needs to be done is mirroring this Kconfig change?
And that doesn't even need to be done until someone wants to get rid of
CONFIG_LIBCRC32C, right?
> select FS_IOMAP
> select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
> help
> This is the next generation of the ext3 filesystem.
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 44b0d418143c..99aa512a7de1 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -31,11 +31,11 @@
> #include <linux/wait.h>
> #include <linux/sched/signal.h>
> #include <linux/blockgroup_lock.h>
> #include <linux/percpu_counter.h>
> #include <linux/ratelimit.h>
> -#include <crypto/hash.h>
> +#include <linux/crc32c.h>
> #include <linux/falloc.h>
> #include <linux/percpu-rwsem.h>
> #include <linux/fiemap.h>
> #ifdef __KERNEL__
> #include <linux/compat.h>
> @@ -1660,13 +1660,10 @@ struct ext4_sb_info {
> struct task_struct *s_mmp_tsk;
>
> /* record the last minlen when FITRIM is called. */
> unsigned long s_last_trim_minblks;
>
> - /* Reference to checksum algorithm driver via cryptoapi */
> - struct crypto_shash *s_chksum_driver;
> -
> /* Precomputed FS UUID checksum for seeding other checksums */
> __u32 s_csum_seed;
>
> /* Reclaim extents from extent status tree */
> struct shrinker *s_es_shrinker;
> @@ -2465,23 +2462,11 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
> #define DX_HASH_LAST DX_HASH_SIPHASH
>
> static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
> const void *address, unsigned int length)
> {
> - struct {
> - struct shash_desc shash;
> - char ctx[4];
> - } desc;
> -
> - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=sizeof(desc.ctx));
> -
> - desc.shash.tfm = sbi->s_chksum_driver;
> - *(u32 *)desc.ctx = crc;
> -
> - BUG_ON(crypto_shash_update(&desc.shash, address, length));
> -
> - return *(u32 *)desc.ctx;
> + return crc32c(crc, address, length);
> }
>
> #ifdef __KERNEL__
>
> /* hash info structure used by the directory hash */
> @@ -3278,15 +3263,11 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
> extern int ext4_register_li_request(struct super_block *sb,
> ext4_group_t first_not_zeroed);
>
> static inline int ext4_has_metadata_csum(struct super_block *sb)
> {
> - WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
> - !EXT4_SB(sb)->s_chksum_driver);
> -
> - return ext4_has_feature_metadata_csum(sb) &&
> - (EXT4_SB(sb)->s_chksum_driver != NULL);
> + return ext4_has_feature_metadata_csum(sb);
> }
Nit: Someone might want to
s/ext4_has_metadata_csum/ext4_has_feature_metadata_csum/ here to get rid
of the confusingly named trivial helper.
Otherwise this logic looks ok to me, so
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
--D
>
> static inline int ext4_has_group_desc_csum(struct super_block *sb)
> {
> return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 16a4ce704460..1a821093cc0d 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1371,12 +1371,10 @@ static void ext4_put_super(struct super_block *sb)
> * Now that we are completely done shutting down the
> * superblock, we need to actually destroy the kobject.
> */
> kobject_put(&sbi->s_kobj);
> wait_for_completion(&sbi->s_kobj_unregister);
> - if (sbi->s_chksum_driver)
> - crypto_free_shash(sbi->s_chksum_driver);
> kfree(sbi->s_blockgroup_lock);
> fs_put_dax(sbi->s_daxdev, NULL);
> fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
> #if IS_ENABLED(CONFIG_UNICODE)
> utf8_unload(sb->s_encoding);
> @@ -4586,19 +4584,10 @@ static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_blo
> return -EINVAL;
> }
> ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
> ext4_orphan_file_block_trigger);
>
> - /* Load the checksum driver */
> - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
> - if (IS_ERR(sbi->s_chksum_driver)) {
> - int ret = PTR_ERR(sbi->s_chksum_driver);
> - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
> - sbi->s_chksum_driver = NULL;
> - return ret;
> - }
> -
> /* Check superblock checksum */
> if (!ext4_superblock_csum_verify(sb, es)) {
> ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
> "invalid superblock checksum. Run e2fsck?");
> return -EFSBADCRC;
> @@ -5638,13 +5627,10 @@ failed_mount8: __maybe_unused
> flush_work(&sbi->s_sb_upd_work);
> ext4_stop_mmpd(sbi);
> del_timer_sync(&sbi->s_err_report);
> ext4_group_desc_free(sbi);
> failed_mount:
> - if (sbi->s_chksum_driver)
> - crypto_free_shash(sbi->s_chksum_driver);
> -
> #if IS_ENABLED(CONFIG_UNICODE)
> utf8_unload(sb->s_encoding);
> #endif
>
> #ifdef CONFIG_QUOTA
> @@ -7433,8 +7419,7 @@ static void __exit ext4_exit_fs(void)
> }
>
> MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
> MODULE_DESCRIPTION("Fourth Extended Filesystem");
> MODULE_LICENSE("GPL");
> -MODULE_SOFTDEP("pre: crc32c");
> module_init(ext4_init_fs)
> module_exit(ext4_exit_fs)
> --
> 2.47.0
>
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v3 15/18] ext4: switch to using the crc32c library
2024-11-04 15:59 ` Darrick J. Wong
@ 2024-11-04 17:48 ` Eric Biggers
0 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-04 17:48 UTC (permalink / raw)
To: Darrick J. Wong
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Mon, Nov 04, 2024 at 07:59:00AM -0800, Darrick J. Wong wrote:
> Hmm. Looking at your git branch (which was quite helpful to link to!) I
> think for XFS we don't need to change the crc32c() calls, and the only
> porting work that needs to be done is mirroring this Kconfig change?
> And that doesn't even need to be done until someone wants to get rid of
> CONFIG_LIBCRC32C, right?
That's correct, no porting work is required now. 'select LIBCRC32C' should be
replaced with 'select CRC32', but that can be done later.
> > @@ -3278,15 +3263,11 @@ extern void ext4_group_desc_csum_set(struct super_block *sb, __u32 group,
> > extern int ext4_register_li_request(struct super_block *sb,
> > ext4_group_t first_not_zeroed);
> >
> > static inline int ext4_has_metadata_csum(struct super_block *sb)
> > {
> > - WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) &&
> > - !EXT4_SB(sb)->s_chksum_driver);
> > -
> > - return ext4_has_feature_metadata_csum(sb) &&
> > - (EXT4_SB(sb)->s_chksum_driver != NULL);
> > + return ext4_has_feature_metadata_csum(sb);
> > }
>
> Nit: Someone might want to
> s/ext4_has_metadata_csum/ext4_has_feature_metadata_csum/ here to get rid
> of the confusingly named trivial helper.
>
Yes, that should be done as a follow-up patch.
> Otherwise this logic looks ok to me, so
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
>
> --D
Thanks,
- Eric
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v3 15/18] ext4: switch to using the crc32c library
2024-11-03 22:31 ` [PATCH v3 15/18] ext4: switch to using the crc32c library Eric Biggers
2024-11-04 15:59 ` Darrick J. Wong
@ 2024-11-04 16:17 ` Theodore Ts'o
1 sibling, 0 replies; 31+ messages in thread
From: Theodore Ts'o @ 2024-11-04 16:17 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Sun, Nov 03, 2024 at 02:31:51PM -0800, Eric Biggers wrote:
> Now that the crc32c() library function directly takes advantage of
> architecture-specific optimizations, it is unnecessary to go through the
> crypto API. Just use crc32c(). This is much simpler, and it improves
> performance due to eliminating the crypto API overhead.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Thanks for the cleanup!
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 16/18] jbd2: switch to using the crc32c library
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (14 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 15/18] ext4: switch to using the crc32c library Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-04 16:01 ` Darrick J. Wong
2024-11-04 16:17 ` Theodore Ts'o
2024-11-03 22:31 ` [PATCH v3 17/18] f2fs: switch to using the crc32 library Eric Biggers
2024-11-03 22:31 ` [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library Eric Biggers
17 siblings, 2 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Now that the crc32c() library function directly takes advantage of
architecture-specific optimizations, it is unnecessary to go through the
crypto API. Just use crc32c(). This is much simpler, and it improves
performance due to eliminating the crypto API overhead.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
fs/jbd2/Kconfig | 2 --
fs/jbd2/journal.c | 25 ++-----------------------
include/linux/jbd2.h | 31 +++----------------------------
3 files changed, 5 insertions(+), 53 deletions(-)
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 4ad2c67f93f1..9c19e1512101 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,11 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
config JBD2
tristate
select CRC32
- select CRYPTO
- select CRYPTO_CRC32C
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
the ext4 and OCFS2 filesystems, but it could also be used to add
journal support to other file systems or block devices such
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 97f487c3d8fc..56cea5a738a7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1373,24 +1373,16 @@ static int journal_check_superblock(journal_t *journal)
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
"at the same time!\n");
return err;
}
- /* Load the checksum driver */
if (jbd2_journal_has_csum_v2or3_feature(journal)) {
if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
printk(KERN_ERR "JBD2: Unknown checksum type\n");
return err;
}
- journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
- err = PTR_ERR(journal->j_chksum_driver);
- journal->j_chksum_driver = NULL;
- return err;
- }
/* Check superblock checksum */
if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
printk(KERN_ERR "JBD2: journal checksum error\n");
err = -EFSBADCRC;
return err;
@@ -1611,12 +1603,10 @@ static journal_t *journal_init_common(struct block_device *bdev,
return journal;
err_cleanup:
percpu_counter_destroy(&journal->j_checkpoint_jh_count);
- if (journal->j_chksum_driver)
- crypto_free_shash(journal->j_chksum_driver);
kfree(journal->j_wbuf);
jbd2_journal_destroy_revoke(journal);
journal_fail_superblock(journal);
kfree(journal);
return ERR_PTR(err);
@@ -2194,12 +2184,10 @@ int jbd2_journal_destroy(journal_t *journal)
if (journal->j_proc_entry)
jbd2_stats_proc_exit(journal);
iput(journal->j_inode);
if (journal->j_revoke)
jbd2_journal_destroy_revoke(journal);
- if (journal->j_chksum_driver)
- crypto_free_shash(journal->j_chksum_driver);
kfree(journal->j_fc_wbuf);
kfree(journal->j_wbuf);
kfree(journal);
return err;
@@ -2340,23 +2328,14 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
pr_err("JBD2: Cannot enable fast commits.\n");
return 0;
}
}
- /* Load the checksum driver if necessary */
- if ((journal->j_chksum_driver == NULL) &&
- INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
- journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
- journal->j_chksum_driver = NULL;
- return 0;
- }
- /* Precompute checksum seed for all metadata */
+ /* Precompute checksum seed for all metadata */
+ if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
- }
lock_buffer(journal->j_sb_buffer);
/* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..33d25a3d15f1 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -26,11 +26,11 @@
#include <linux/mutex.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/bit_spinlock.h>
#include <linux/blkdev.h>
-#include <crypto/hash.h>
+#include <linux/crc32c.h>
#endif
#define journal_oom_retry 1
/*
@@ -1239,17 +1239,10 @@ struct journal_s
* An opaque pointer to fs-private information. ext3 puts its
* superblock pointer here.
*/
void *j_private;
- /**
- * @j_chksum_driver:
- *
- * Reference to checksum algorithm driver via cryptoapi.
- */
- struct crypto_shash *j_chksum_driver;
-
/**
* @j_csum_seed:
*
* Precomputed journal UUID checksum for seeding other checksums.
*/
@@ -1748,14 +1741,11 @@ static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j)
return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j);
}
static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
{
- WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) &&
- journal->j_chksum_driver == NULL);
-
- return journal->j_chksum_driver != NULL;
+ return jbd2_journal_has_csum_v2or3_feature(journal);
}
static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
{
int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks);
@@ -1794,26 +1784,11 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
#define JBD_MAX_CHECKSUM_SIZE 4
static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[JBD_MAX_CHECKSUM_SIZE];
- } desc;
- int err;
-
- BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
- JBD_MAX_CHECKSUM_SIZE);
-
- desc.shash.tfm = journal->j_chksum_driver;
- *(u32 *)desc.ctx = crc;
-
- err = crypto_shash_update(&desc.shash, address, length);
- BUG_ON(err);
-
- return *(u32 *)desc.ctx;
+ return crc32c(crc, address, length);
}
/* Return most recent uncommitted transaction */
static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
{
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 16/18] jbd2: switch to using the crc32c library
2024-11-03 22:31 ` [PATCH v3 16/18] jbd2: " Eric Biggers
@ 2024-11-04 16:01 ` Darrick J. Wong
2024-11-04 18:02 ` Eric Biggers
2024-11-04 16:17 ` Theodore Ts'o
1 sibling, 1 reply; 31+ messages in thread
From: Darrick J. Wong @ 2024-11-04 16:01 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Sun, Nov 03, 2024 at 02:31:52PM -0800, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Now that the crc32c() library function directly takes advantage of
> architecture-specific optimizations, it is unnecessary to go through the
> crypto API. Just use crc32c(). This is much simpler, and it improves
> performance due to eliminating the crypto API overhead.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> fs/jbd2/Kconfig | 2 --
> fs/jbd2/journal.c | 25 ++-----------------------
> include/linux/jbd2.h | 31 +++----------------------------
> 3 files changed, 5 insertions(+), 53 deletions(-)
>
> diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
> index 4ad2c67f93f1..9c19e1512101 100644
> --- a/fs/jbd2/Kconfig
> +++ b/fs/jbd2/Kconfig
> @@ -1,11 +1,9 @@
> # SPDX-License-Identifier: GPL-2.0-only
> config JBD2
> tristate
> select CRC32
> - select CRYPTO
> - select CRYPTO_CRC32C
> help
> This is a generic journaling layer for block devices that support
> both 32-bit and 64-bit block numbers. It is currently used by
> the ext4 and OCFS2 filesystems, but it could also be used to add
> journal support to other file systems or block devices such
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 97f487c3d8fc..56cea5a738a7 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1373,24 +1373,16 @@ static int journal_check_superblock(journal_t *journal)
> printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
> "at the same time!\n");
> return err;
> }
>
> - /* Load the checksum driver */
> if (jbd2_journal_has_csum_v2or3_feature(journal)) {
> if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
> printk(KERN_ERR "JBD2: Unknown checksum type\n");
> return err;
> }
>
> - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
> - if (IS_ERR(journal->j_chksum_driver)) {
> - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
> - err = PTR_ERR(journal->j_chksum_driver);
> - journal->j_chksum_driver = NULL;
> - return err;
> - }
> /* Check superblock checksum */
> if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
> printk(KERN_ERR "JBD2: journal checksum error\n");
> err = -EFSBADCRC;
> return err;
> @@ -1611,12 +1603,10 @@ static journal_t *journal_init_common(struct block_device *bdev,
>
> return journal;
>
> err_cleanup:
> percpu_counter_destroy(&journal->j_checkpoint_jh_count);
> - if (journal->j_chksum_driver)
> - crypto_free_shash(journal->j_chksum_driver);
> kfree(journal->j_wbuf);
> jbd2_journal_destroy_revoke(journal);
> journal_fail_superblock(journal);
> kfree(journal);
> return ERR_PTR(err);
> @@ -2194,12 +2184,10 @@ int jbd2_journal_destroy(journal_t *journal)
> if (journal->j_proc_entry)
> jbd2_stats_proc_exit(journal);
> iput(journal->j_inode);
> if (journal->j_revoke)
> jbd2_journal_destroy_revoke(journal);
> - if (journal->j_chksum_driver)
> - crypto_free_shash(journal->j_chksum_driver);
> kfree(journal->j_fc_wbuf);
> kfree(journal->j_wbuf);
> kfree(journal);
>
> return err;
> @@ -2340,23 +2328,14 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
> pr_err("JBD2: Cannot enable fast commits.\n");
> return 0;
> }
> }
>
> - /* Load the checksum driver if necessary */
> - if ((journal->j_chksum_driver == NULL) &&
> - INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
> - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
> - if (IS_ERR(journal->j_chksum_driver)) {
> - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
> - journal->j_chksum_driver = NULL;
> - return 0;
> - }
> - /* Precompute checksum seed for all metadata */
> + /* Precompute checksum seed for all metadata */
> + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3))
> journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
> sizeof(sb->s_uuid));
> - }
>
> lock_buffer(journal->j_sb_buffer);
>
> /* If enabling v3 checksums, update superblock */
> if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 8aef9bb6ad57..33d25a3d15f1 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -26,11 +26,11 @@
> #include <linux/mutex.h>
> #include <linux/timer.h>
> #include <linux/slab.h>
> #include <linux/bit_spinlock.h>
> #include <linux/blkdev.h>
> -#include <crypto/hash.h>
> +#include <linux/crc32c.h>
> #endif
>
> #define journal_oom_retry 1
>
> /*
> @@ -1239,17 +1239,10 @@ struct journal_s
> * An opaque pointer to fs-private information. ext3 puts its
> * superblock pointer here.
> */
> void *j_private;
>
> - /**
> - * @j_chksum_driver:
> - *
> - * Reference to checksum algorithm driver via cryptoapi.
> - */
> - struct crypto_shash *j_chksum_driver;
> -
> /**
> * @j_csum_seed:
> *
> * Precomputed journal UUID checksum for seeding other checksums.
> */
> @@ -1748,14 +1741,11 @@ static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j)
> return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j);
> }
>
> static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
> {
> - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) &&
> - journal->j_chksum_driver == NULL);
> -
> - return journal->j_chksum_driver != NULL;
> + return jbd2_journal_has_csum_v2or3_feature(journal);
Same comment as my last reply about removing trivial helpers, but
otherwise
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
If you push this for 6.13 I'd be happy that the shash junk finally went
away. The onstack struct desc thing in the _chksum() functions was by
far the most sketchy part of the ext4/jbd2 metadata csum project. :)
--D
> }
>
> static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb)
> {
> int num_fc_blocks = be32_to_cpu(jsb->s_num_fc_blks);
> @@ -1794,26 +1784,11 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
> #define JBD_MAX_CHECKSUM_SIZE 4
>
> static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
> const void *address, unsigned int length)
> {
> - struct {
> - struct shash_desc shash;
> - char ctx[JBD_MAX_CHECKSUM_SIZE];
> - } desc;
> - int err;
> -
> - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
> - JBD_MAX_CHECKSUM_SIZE);
> -
> - desc.shash.tfm = journal->j_chksum_driver;
> - *(u32 *)desc.ctx = crc;
> -
> - err = crypto_shash_update(&desc.shash, address, length);
> - BUG_ON(err);
> -
> - return *(u32 *)desc.ctx;
> + return crc32c(crc, address, length);
> }
>
> /* Return most recent uncommitted transaction */
> static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
> {
> --
> 2.47.0
>
>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v3 16/18] jbd2: switch to using the crc32c library
2024-11-04 16:01 ` Darrick J. Wong
@ 2024-11-04 18:02 ` Eric Biggers
0 siblings, 0 replies; 31+ messages in thread
From: Eric Biggers @ 2024-11-04 18:02 UTC (permalink / raw)
To: Darrick J. Wong
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Mon, Nov 04, 2024 at 08:01:36AM -0800, Darrick J. Wong wrote:
> Same comment as my last reply about removing trivial helpers, but
> otherwise
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
>
> If you push this for 6.13 I'd be happy that the shash junk finally went
> away. The onstack struct desc thing in the _chksum() functions was by
> far the most sketchy part of the ext4/jbd2 metadata csum project. :)
It will take a bit longer I'm afraid, since this patchset depends on patches
that are currently enqueued in three different trees for 6.13. My current plan
is to target 6.14, and get this series into into linux-next shortly after the
6.13 merge window closes.
- Eric
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v3 16/18] jbd2: switch to using the crc32c library
2024-11-03 22:31 ` [PATCH v3 16/18] jbd2: " Eric Biggers
2024-11-04 16:01 ` Darrick J. Wong
@ 2024-11-04 16:17 ` Theodore Ts'o
1 sibling, 0 replies; 31+ messages in thread
From: Theodore Ts'o @ 2024-11-04 16:17 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
On Sun, Nov 03, 2024 at 02:31:52PM -0800, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Now that the crc32c() library function directly takes advantage of
> architecture-specific optimizations, it is unnecessary to go through the
> crypto API. Just use crc32c(). This is much simpler, and it improves
> performance due to eliminating the crypto API overhead.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Theodore Ts'o <tytso@mit.edu>
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 17/18] f2fs: switch to using the crc32 library
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (15 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 16/18] jbd2: " Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-05 1:34 ` [f2fs-dev] " Chao Yu
2024-11-03 22:31 ` [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library Eric Biggers
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Now that the crc32() library function takes advantage of
architecture-specific optimizations, it is unnecessary to go through the
crypto API. Just use crc32(). This is much simpler, and it improves
performance due to eliminating the crypto API overhead.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
fs/f2fs/Kconfig | 3 +--
fs/f2fs/f2fs.h | 19 +------------------
fs/f2fs/super.c | 15 ---------------
3 files changed, 2 insertions(+), 35 deletions(-)
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 68a1e23e1557..5916a02fb46d 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -2,12 +2,11 @@
config F2FS_FS
tristate "F2FS filesystem support"
depends on BLOCK
select BUFFER_HEAD
select NLS
- select CRYPTO
- select CRYPTO_CRC32
+ select CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
select FS_IOMAP
select LZ4_COMPRESS if F2FS_FS_LZ4
select LZ4_DECOMPRESS if F2FS_FS_LZ4
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 33f5449dc22d..1fc5c2743c8d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1761,13 +1761,10 @@ struct f2fs_sb_info {
/* For write statistics */
u64 sectors_written_start;
u64 kbytes_written;
- /* Reference to checksum algorithm driver via cryptoapi */
- struct crypto_shash *s_chksum_driver;
-
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_chksum_seed;
struct workqueue_struct *post_read_wq; /* post read workqueue */
@@ -1941,25 +1938,11 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi,
* Inline functions
*/
static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc,
const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[4];
- } desc;
- int err;
-
- BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx));
-
- desc.shash.tfm = sbi->s_chksum_driver;
- *(u32 *)desc.ctx = crc;
-
- err = crypto_shash_update(&desc.shash, address, length);
- BUG_ON(err);
-
- return *(u32 *)desc.ctx;
+ return crc32(crc, address, length);
}
static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
unsigned int length)
{
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 87ab5696bd48..003d3bcb0caa 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1670,12 +1670,10 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_destroy_post_read_wq(sbi);
kvfree(sbi->ckpt);
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
#ifdef CONFIG_QUOTA
@@ -4419,19 +4417,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
mutex_init(&sbi->flush_lock);
- /* Load the checksum driver */
- sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
- if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_err(sbi, "Cannot load crc32 driver.");
- err = PTR_ERR(sbi->s_chksum_driver);
- sbi->s_chksum_driver = NULL;
- goto free_sbi;
- }
-
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_err(sbi, "unable to set blocksize");
goto free_sbi;
}
@@ -4872,12 +4861,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy);
kvfree(options);
free_sb_buf:
kfree(raw_super);
free_sbi:
- if (sbi->s_chksum_driver)
- crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi);
sb->s_fs_info = NULL;
/* give only one another chance */
if (retry_cnt > 0 && skip_recovery) {
@@ -5080,7 +5067,5 @@ module_init(init_f2fs_fs)
module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32");
-
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [f2fs-dev] [PATCH v3 17/18] f2fs: switch to using the crc32 library
2024-11-03 22:31 ` [PATCH v3 17/18] f2fs: switch to using the crc32 library Eric Biggers
@ 2024-11-05 1:34 ` Chao Yu
0 siblings, 0 replies; 31+ messages in thread
From: Chao Yu @ 2024-11-05 1:34 UTC (permalink / raw)
To: Eric Biggers, linux-kernel
Cc: Chao Yu, linux-arch, linux-s390, linux-scsi, x86, linux-mips,
linux-f2fs-devel, linux-crypto, loongarch, sparclinux,
linux-riscv, linux-ext4, linuxppc-dev, Ard Biesheuvel,
linux-arm-kernel
On 2024/11/4 6:31, Eric Biggers via Linux-f2fs-devel wrote:
> From: Eric Biggers <ebiggers@google.com>
>
> Now that the crc32() library function takes advantage of
> architecture-specific optimizations, it is unnecessary to go through the
> crypto API. Just use crc32(). This is much simpler, and it improves
> performance due to eliminating the crypto API overhead.
>
> Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Chao Yu <chao@kernel.org>
Thanks,
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library
2024-11-03 22:31 [PATCH v3 00/18] Wire up CRC32 library functions to arch-optimized code Eric Biggers
` (16 preceding siblings ...)
2024-11-03 22:31 ` [PATCH v3 17/18] f2fs: switch to using the crc32 library Eric Biggers
@ 2024-11-03 22:31 ` Eric Biggers
2024-11-05 1:33 ` Martin K. Petersen
17 siblings, 1 reply; 31+ messages in thread
From: Eric Biggers @ 2024-11-03 22:31 UTC (permalink / raw)
To: linux-kernel
Cc: linux-arch, linux-arm-kernel, linux-crypto, linux-ext4,
linux-f2fs-devel, linux-mips, linux-riscv, linux-s390, linux-scsi,
linuxppc-dev, loongarch, sparclinux, x86, Ard Biesheuvel
From: Eric Biggers <ebiggers@google.com>
Now that the crc32c() library function directly takes advantage of
architecture-specific optimizations, it is unnecessary to go through the
crypto API. Just use crc32c(). This is much simpler, and it improves
performance due to eliminating the crypto API overhead.
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
drivers/target/iscsi/Kconfig | 3 +-
drivers/target/iscsi/iscsi_target.c | 153 +++++++---------------
drivers/target/iscsi/iscsi_target_login.c | 50 -------
drivers/target/iscsi/iscsi_target_login.h | 1 -
drivers/target/iscsi/iscsi_target_nego.c | 21 +--
include/target/iscsi/iscsi_target_core.h | 3 -
6 files changed, 49 insertions(+), 182 deletions(-)
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 1c0517a12571..70d76f3dd693 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,11 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
config ISCSI_TARGET
tristate "SCSI Target Mode Stack"
depends on INET
+ select CRC32
select CRYPTO
- select CRYPTO_CRC32C
+ select CRYPTO_HASH
help
Say M to enable the SCSI target mode stack. A SCSI target mode stack
is software that makes local storage available over a storage network
to a SCSI initiator system. The supported storage network technologies
include iSCSI, Fibre Channel and the SCSI RDMA Protocol (SRP).
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 6002283cbeba..091c1efccfb7 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -6,11 +6,11 @@
*
* Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
*
******************************************************************************/
-#include <crypto/hash.h>
+#include <linux/crc32c.h>
#include <linux/string.h>
#include <linux/kthread.h>
#include <linux/completion.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -488,12 +488,12 @@ void iscsit_aborted_task(struct iscsit_conn *conn, struct iscsit_cmd *cmd)
__iscsit_free_cmd(cmd, true);
}
EXPORT_SYMBOL(iscsit_aborted_task);
-static void iscsit_do_crypto_hash_buf(struct ahash_request *, const void *,
- u32, u32, const void *, void *);
+static u32 iscsit_crc_buf(const void *buf, u32 payload_length,
+ u32 padding, const void *pad_bytes);
static void iscsit_tx_thread_wait_for_tcp(struct iscsit_conn *);
static int
iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
const void *data_buf, u32 data_buf_len)
@@ -508,13 +508,11 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
iov[niov++].iov_len = ISCSI_HDR_LEN;
if (conn->conn_ops->HeaderDigest) {
u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
- iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr,
- ISCSI_HDR_LEN, 0, NULL,
- header_digest);
+ *header_digest = iscsit_crc_buf(hdr, ISCSI_HDR_LEN, 0, NULL);
iov[0].iov_len += ISCSI_CRC_LEN;
tx_size += ISCSI_CRC_LEN;
pr_debug("Attaching CRC32C HeaderDigest"
" to opcode 0x%x 0x%08x\n",
@@ -535,15 +533,13 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
pr_debug("Attaching %u additional"
" padding bytes.\n", padding);
}
if (conn->conn_ops->DataDigest) {
- iscsit_do_crypto_hash_buf(conn->conn_tx_hash,
- data_buf, data_buf_len,
- padding, &cmd->pad_bytes,
- &cmd->data_crc);
-
+ cmd->data_crc = iscsit_crc_buf(data_buf, data_buf_len,
+ padding,
+ &cmd->pad_bytes);
iov[niov].iov_base = &cmd->data_crc;
iov[niov++].iov_len = ISCSI_CRC_LEN;
tx_size += ISCSI_CRC_LEN;
pr_debug("Attached DataDigest for %u"
" bytes opcode 0x%x, CRC 0x%08x\n",
@@ -564,12 +560,12 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
}
static int iscsit_map_iovec(struct iscsit_cmd *cmd, struct kvec *iov, int nvec,
u32 data_offset, u32 data_length);
static void iscsit_unmap_iovec(struct iscsit_cmd *);
-static u32 iscsit_do_crypto_hash_sg(struct ahash_request *, struct iscsit_cmd *,
- u32, u32, u32, u8 *);
+static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length,
+ u32 padding, const u8 *pad_bytes);
static int
iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
const struct iscsi_datain *datain)
{
struct kvec *iov;
@@ -582,14 +578,12 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
tx_size += ISCSI_HDR_LEN;
if (conn->conn_ops->HeaderDigest) {
u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN];
- iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu,
- ISCSI_HDR_LEN, 0, NULL,
- header_digest);
-
+ *header_digest = iscsit_crc_buf(cmd->pdu, ISCSI_HDR_LEN, 0,
+ NULL);
iov[0].iov_len += ISCSI_CRC_LEN;
tx_size += ISCSI_CRC_LEN;
pr_debug("Attaching CRC32 HeaderDigest for DataIN PDU 0x%08x\n",
*header_digest);
@@ -612,16 +606,12 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
pr_debug("Attaching %u padding bytes\n", cmd->padding);
}
if (conn->conn_ops->DataDigest) {
- cmd->data_crc = iscsit_do_crypto_hash_sg(conn->conn_tx_hash,
- cmd, datain->offset,
- datain->length,
- cmd->padding,
- cmd->pad_bytes);
-
+ cmd->data_crc = iscsit_crc_sglist(cmd, datain->length,
+ cmd->padding, cmd->pad_bytes);
iov[iov_count].iov_base = &cmd->data_crc;
iov[iov_count++].iov_len = ISCSI_CRC_LEN;
tx_size += ISCSI_CRC_LEN;
pr_debug("Attached CRC32C DataDigest %d bytes, crc 0x%08x\n",
@@ -1402,81 +1392,49 @@ iscsit_handle_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
return 0;
return iscsit_get_immediate_data(cmd, hdr, dump_payload);
}
-static u32 iscsit_do_crypto_hash_sg(
- struct ahash_request *hash,
- struct iscsit_cmd *cmd,
- u32 data_offset,
- u32 data_length,
- u32 padding,
- u8 *pad_bytes)
+static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length,
+ u32 padding, const u8 *pad_bytes)
{
- u32 data_crc;
- struct scatterlist *sg;
- unsigned int page_off;
-
- crypto_ahash_init(hash);
-
- sg = cmd->first_data_sg;
- page_off = cmd->first_data_sg_off;
-
- if (data_length && page_off) {
- struct scatterlist first_sg;
- u32 len = min_t(u32, data_length, sg->length - page_off);
-
- sg_init_table(&first_sg, 1);
- sg_set_page(&first_sg, sg_page(sg), len, sg->offset + page_off);
-
- ahash_request_set_crypt(hash, &first_sg, NULL, len);
- crypto_ahash_update(hash);
-
- data_length -= len;
- sg = sg_next(sg);
- }
+ struct scatterlist *sg = cmd->first_data_sg;
+ unsigned int page_off = cmd->first_data_sg_off;
+ u32 crc = ~0;
while (data_length) {
- u32 cur_len = min_t(u32, data_length, sg->length);
+ u32 cur_len = min_t(u32, data_length, sg->length - page_off);
+ const void *virt;
- ahash_request_set_crypt(hash, sg, NULL, cur_len);
- crypto_ahash_update(hash);
+ virt = kmap_local_page(sg_page(sg)) + sg->offset + page_off;
+ crc = crc32c(crc, virt, cur_len);
+ kunmap_local(virt);
- data_length -= cur_len;
/* iscsit_map_iovec has already checked for invalid sg pointers */
sg = sg_next(sg);
- }
- if (padding) {
- struct scatterlist pad_sg;
-
- sg_init_one(&pad_sg, pad_bytes, padding);
- ahash_request_set_crypt(hash, &pad_sg, (u8 *)&data_crc,
- padding);
- crypto_ahash_finup(hash);
- } else {
- ahash_request_set_crypt(hash, NULL, (u8 *)&data_crc, 0);
- crypto_ahash_final(hash);
+ page_off = 0;
+ data_length -= cur_len;
}
- return data_crc;
+ if (padding)
+ crc = crc32c(crc, pad_bytes, padding);
+
+ return ~crc;
}
-static void iscsit_do_crypto_hash_buf(struct ahash_request *hash,
- const void *buf, u32 payload_length, u32 padding,
- const void *pad_bytes, void *data_crc)
+static u32 iscsit_crc_buf(const void *buf, u32 payload_length,
+ u32 padding, const void *pad_bytes)
{
- struct scatterlist sg[2];
+ u32 crc = ~0;
- sg_init_table(sg, ARRAY_SIZE(sg));
- sg_set_buf(sg, buf, payload_length);
- if (padding)
- sg_set_buf(sg + 1, pad_bytes, padding);
+ crc = crc32c(crc, buf, payload_length);
- ahash_request_set_crypt(hash, sg, data_crc, payload_length + padding);
+ if (padding)
+ crc = crc32c(crc, pad_bytes, padding);
- crypto_ahash_digest(hash);
+ return ~crc;
}
int
__iscsit_check_dataout_hdr(struct iscsit_conn *conn, void *buf,
struct iscsit_cmd *cmd, u32 payload_length,
@@ -1660,15 +1618,12 @@ iscsit_get_dataout(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
return -1;
if (conn->conn_ops->DataDigest) {
u32 data_crc;
- data_crc = iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd,
- be32_to_cpu(hdr->offset),
- payload_length, padding,
- cmd->pad_bytes);
-
+ data_crc = iscsit_crc_sglist(cmd, payload_length, padding,
+ cmd->pad_bytes);
if (checksum != data_crc) {
pr_err("ITT: 0x%08x, Offset: %u, Length: %u,"
" DataSN: 0x%08x, CRC32C DataDigest 0x%08x"
" does not match computed 0x%08x\n",
hdr->itt, hdr->offset, payload_length,
@@ -1923,14 +1878,12 @@ static int iscsit_handle_nop_out(struct iscsit_conn *conn, struct iscsit_cmd *cm
ret = -1;
goto out;
}
if (conn->conn_ops->DataDigest) {
- iscsit_do_crypto_hash_buf(conn->conn_rx_hash, ping_data,
- payload_length, padding,
- cmd->pad_bytes, &data_crc);
-
+ data_crc = iscsit_crc_buf(ping_data, payload_length,
+ padding, cmd->pad_bytes);
if (checksum != data_crc) {
pr_err("Ping data CRC32C DataDigest"
" 0x%08x does not match computed 0x%08x\n",
checksum, data_crc);
if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
@@ -2326,14 +2279,11 @@ iscsit_handle_text_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
rx_got = rx_data(conn, &iov[0], niov, rx_size);
if (rx_got != rx_size)
goto reject;
if (conn->conn_ops->DataDigest) {
- iscsit_do_crypto_hash_buf(conn->conn_rx_hash,
- text_in, rx_size, 0, NULL,
- &data_crc);
-
+ data_crc = iscsit_crc_buf(text_in, rx_size, 0, NULL);
if (checksum != data_crc) {
pr_err("Text data CRC32C DataDigest"
" 0x%08x does not match computed"
" 0x%08x\n", checksum, data_crc);
if (!conn->sess->sess_ops->ErrorRecoveryLevel) {
@@ -2686,14 +2636,12 @@ static int iscsit_handle_immediate_data(
}
if (conn->conn_ops->DataDigest) {
u32 data_crc;
- data_crc = iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd,
- cmd->write_data_done, length, padding,
- cmd->pad_bytes);
-
+ data_crc = iscsit_crc_sglist(cmd, length, padding,
+ cmd->pad_bytes);
if (checksum != data_crc) {
pr_err("ImmediateData CRC32C DataDigest 0x%08x"
" does not match computed 0x%08x\n", checksum,
data_crc);
@@ -4114,14 +4062,12 @@ static void iscsit_get_rx_pdu(struct iscsit_conn *conn)
if (ret != ISCSI_CRC_LEN) {
iscsit_rx_thread_wait_for_tcp(conn);
break;
}
- iscsit_do_crypto_hash_buf(conn->conn_rx_hash, buffer,
- ISCSI_HDR_LEN, 0, NULL,
- &checksum);
-
+ checksum = iscsit_crc_buf(buffer, ISCSI_HDR_LEN, 0,
+ NULL);
if (digest != checksum) {
pr_err("HeaderDigest CRC32C failed,"
" received 0x%08x, computed 0x%08x\n",
digest, checksum);
/*
@@ -4404,19 +4350,10 @@ int iscsit_close_connection(
* If any other processes are accessing this connection pointer we
* must wait until they have completed.
*/
iscsit_check_conn_usage_count(conn);
- ahash_request_free(conn->conn_tx_hash);
- if (conn->conn_rx_hash) {
- struct crypto_ahash *tfm;
-
- tfm = crypto_ahash_reqtfm(conn->conn_rx_hash);
- ahash_request_free(conn->conn_rx_hash);
- crypto_free_ahash(tfm);
- }
-
if (conn->sock)
sock_release(conn->sock);
if (conn->conn_transport->iscsit_free_conn)
conn->conn_transport->iscsit_free_conn(conn);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 90b870f234f0..c2ac9a99ebbb 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -6,11 +6,10 @@
*
* Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
*
******************************************************************************/
-#include <crypto/hash.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kthread.h>
#include <linux/sched/signal.h>
#include <linux/idr.h>
@@ -69,50 +68,10 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsit_conn *conn)
out_login:
kfree(login);
return NULL;
}
-/*
- * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup
- * per struct iscsit_conn libcrypto contexts for crc32c and crc32-intel
- */
-int iscsi_login_setup_crypto(struct iscsit_conn *conn)
-{
- struct crypto_ahash *tfm;
-
- /*
- * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts
- * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback
- * to software 1x8 byte slicing from crc32c.ko
- */
- tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm)) {
- pr_err("crypto_alloc_ahash() failed\n");
- return -ENOMEM;
- }
-
- conn->conn_rx_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!conn->conn_rx_hash) {
- pr_err("ahash_request_alloc() failed for conn_rx_hash\n");
- crypto_free_ahash(tfm);
- return -ENOMEM;
- }
- ahash_request_set_callback(conn->conn_rx_hash, 0, NULL, NULL);
-
- conn->conn_tx_hash = ahash_request_alloc(tfm, GFP_KERNEL);
- if (!conn->conn_tx_hash) {
- pr_err("ahash_request_alloc() failed for conn_tx_hash\n");
- ahash_request_free(conn->conn_rx_hash);
- conn->conn_rx_hash = NULL;
- crypto_free_ahash(tfm);
- return -ENOMEM;
- }
- ahash_request_set_callback(conn->conn_tx_hash, 0, NULL, NULL);
-
- return 0;
-}
-
static int iscsi_login_check_initiator_version(
struct iscsit_conn *conn,
u8 version_max,
u8 version_min)
{
@@ -1163,19 +1122,10 @@ void iscsi_target_login_sess_out(struct iscsit_conn *conn,
} else
spin_unlock_bh(&conn->sess->conn_lock);
iscsit_dec_session_usage_count(conn->sess);
}
- ahash_request_free(conn->conn_tx_hash);
- if (conn->conn_rx_hash) {
- struct crypto_ahash *tfm;
-
- tfm = crypto_ahash_reqtfm(conn->conn_rx_hash);
- ahash_request_free(conn->conn_rx_hash);
- crypto_free_ahash(tfm);
- }
-
if (conn->param_list) {
iscsi_release_param_list(conn->param_list);
conn->param_list = NULL;
}
iscsi_target_nego_release(conn);
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
index e8760735486b..03c7d695d58f 100644
--- a/drivers/target/iscsi/iscsi_target_login.h
+++ b/drivers/target/iscsi/iscsi_target_login.h
@@ -7,11 +7,10 @@
struct iscsit_conn;
struct iscsi_login;
struct iscsi_np;
struct sockaddr_storage;
-extern int iscsi_login_setup_crypto(struct iscsit_conn *);
extern int iscsi_check_for_session_reinstatement(struct iscsit_conn *);
extern int iscsi_login_post_auth_non_zero_tsih(struct iscsit_conn *, u16, u32);
extern int iscsit_setup_np(struct iscsi_np *,
struct sockaddr_storage *);
extern int iscsi_target_setup_login_socket(struct iscsi_np *,
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index fa3fb5f4e6bc..16e3ded98c32 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -1192,18 +1192,11 @@ int iscsi_target_locate_portal(
if (!sessiontype) {
if (!login->leading_connection)
goto get_target;
sess->sess_ops->SessionType = 1;
- /*
- * Setup crc32c modules from libcrypto
- */
- if (iscsi_login_setup_crypto(conn) < 0) {
- pr_err("iscsi_login_setup_crypto() failed\n");
- ret = -1;
- goto out;
- }
+
/*
* Serialize access across the discovery struct iscsi_portal_group to
* process login attempt.
*/
conn->tpg = iscsit_global->discovery_tpg;
@@ -1256,21 +1249,11 @@ int iscsi_target_locate_portal(
ret = -1;
goto out;
}
conn->tpg_np = tpg_np;
pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt);
- /*
- * Setup crc32c modules from libcrypto
- */
- if (iscsi_login_setup_crypto(conn) < 0) {
- pr_err("iscsi_login_setup_crypto() failed\n");
- kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put);
- iscsit_put_tiqn_for_login(tiqn);
- conn->tpg = NULL;
- ret = -1;
- goto out;
- }
+
/*
* Serialize access across the struct iscsi_portal_group to
* process login attempt.
*/
if (iscsit_access_np(np, conn->tpg) < 0) {
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 60af7c63b34e..51ca80abacf7 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -574,13 +574,10 @@ struct iscsit_conn {
spinlock_t nopin_timer_lock;
spinlock_t response_queue_lock;
spinlock_t state_lock;
spinlock_t login_timer_lock;
spinlock_t login_worker_lock;
- /* libcrypto RX and TX contexts for crc32c */
- struct ahash_request *conn_rx_hash;
- struct ahash_request *conn_tx_hash;
/* Used for scheduling TX and RX connection kthreads */
cpumask_var_t conn_cpumask;
cpumask_var_t allowed_cpumask;
unsigned int conn_rx_reset_cpumask:1;
unsigned int conn_tx_reset_cpumask:1;
--
2.47.0
^ permalink raw reply related [flat|nested] 31+ messages in thread
* Re: [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library
2024-11-03 22:31 ` [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library Eric Biggers
@ 2024-11-05 1:33 ` Martin K. Petersen
0 siblings, 0 replies; 31+ messages in thread
From: Martin K. Petersen @ 2024-11-05 1:33 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-kernel, linux-arch, linux-arm-kernel, linux-crypto,
linux-ext4, linux-f2fs-devel, linux-mips, linux-riscv, linux-s390,
linux-scsi, linuxppc-dev, loongarch, sparclinux, x86,
Ard Biesheuvel
Eric,
> Now that the crc32c() library function directly takes advantage of
> architecture-specific optimizations, it is unnecessary to go through
> the crypto API. Just use crc32c(). This is much simpler, and it
> improves performance due to eliminating the crypto API overhead.
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 31+ messages in thread