From: Eric Biggers <ebiggers@kernel.org>
To: linux-kernel@vger.kernel.org
Cc: linux-crypto@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH 6/6] x86/crc32: implement crc32_be using new template
Date: Sun, 24 Nov 2024 20:11:29 -0800 [thread overview]
Message-ID: <20241125041129.192999-7-ebiggers@kernel.org> (raw)
In-Reply-To: <20241125041129.192999-1-ebiggers@kernel.org>
From: Eric Biggers <ebiggers@google.com>
crc32_be was previously unoptimized on x86. Optimize it using the new
template. This improves performance by over 25x in some cases.
Benchmark results on AMD Ryzen 9 9950X (Zen 5) using crc_kunit:
Length Before After
------ ------ -----
1 389 MB/s 325 MB/s
16 2845 MB/s 2911 MB/s
64 3012 MB/s 6513 MB/s
127 2567 MB/s 9057 MB/s
128 3048 MB/s 11589 MB/s
200 3070 MB/s 14042 MB/s
256 3067 MB/s 20454 MB/s
511 2938 MB/s 26245 MB/s
512 3081 MB/s 36926 MB/s
1024 3090 MB/s 61914 MB/s
3173 3065 MB/s 76201 MB/s
4096 3084 MB/s 82547 MB/s
16384 3084 MB/s 89333 MB/s
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/lib/crc-pclmul-consts.h | 49 +++++++++++++++++++++++++++++++-
arch/x86/lib/crc32-glue.c | 4 +++
arch/x86/lib/crc32-pclmul.S | 1 +
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/arch/x86/lib/crc-pclmul-consts.h b/arch/x86/lib/crc-pclmul-consts.h
index c3ca689eae3b8..f8af6e9278c83 100644
--- a/arch/x86/lib/crc-pclmul-consts.h
+++ b/arch/x86/lib/crc-pclmul-consts.h
@@ -1,10 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* CRC constants generated by:
*
- * ./scripts/crc/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320
+ * ./scripts/crc/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc32_msb_0x04c11db7
*
* Do not edit manually.
*/
/*
@@ -97,5 +97,52 @@ static const struct {
0xb4e5b025f7011641, /* floor(x^95 / G(x)) */
0x1db710641, /* G(x) */
},
.extract_crc_mask = {0, 0xffffffff},
};
+
+/*
+ * CRC folding constants generated for most-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ * x^5 + x^4 + x^2 + x + 1
+ */
+static const struct {
+ u8 bswap_mask[16];
+ u64 fold_across_2048_bits_consts[2];
+ u64 fold_across_1024_bits_consts[2];
+ u64 fold_across_512_bits_consts[2];
+ u64 fold_across_256_bits_consts[2];
+ u64 fold_across_128_bits_consts[2];
+ u8 shuf_table[48];
+ u64 barrett_reduction_consts[2];
+} crc32_msb_0x04c11db7_consts __cacheline_aligned __maybe_unused = {
+ .bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+ .fold_across_2048_bits_consts = {
+ 0x88fe2237, /* x^(2048+0) mod G(x) */
+ 0xcbcf3bcb, /* x^(2048+64) mod G(x) */
+ },
+ .fold_across_1024_bits_consts = {
+ 0x567fddeb, /* x^(1024+0) mod G(x) */
+ 0x10bd4d7c, /* x^(1024+64) mod G(x) */
+ },
+ .fold_across_512_bits_consts = {
+ 0xe6228b11, /* x^(512+0) mod G(x) */
+ 0x8833794c, /* x^(512+64) mod G(x) */
+ },
+ .fold_across_256_bits_consts = {
+ 0x75be46b7, /* x^(256+0) mod G(x) */
+ 0x569700e5, /* x^(256+64) mod G(x) */
+ },
+ .fold_across_128_bits_consts = {
+ 0xe8a45605, /* x^(128+0) mod G(x) */
+ 0xc5b9cd4c, /* x^(128+64) mod G(x) */
+ },
+ .shuf_table = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ },
+ .barrett_reduction_consts = {
+ 0x04d101df481b4e5a, /* floor(x^96 / G(x)) - x^64 */
+ 0x104c11db7, /* G(x) */
+ },
+};
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index afcdeee429664..326261e503b42 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -18,10 +18,11 @@
static DEFINE_STATIC_KEY_FALSE(have_crc32);
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
+DECLARE_CRC_PCLMUL_FUNCS(crc32_msb, u32);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
have_pclmulqdq, IS_ENABLED(CONFIG_CRC32_SLICEBY8));
@@ -69,10 +70,12 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32c_le_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
+ CRC_PCLMUL(crc, p, len, crc32_msb, crc32_msb_0x04c11db7_consts,
+ have_pclmulqdq, IS_ENABLED(CONFIG_CRC32_SLICEBY8));
return crc32_be_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_be_arch);
static int __init crc32_x86_init(void)
@@ -80,10 +83,11 @@ static int __init crc32_x86_init(void)
if (boot_cpu_has(X86_FEATURE_XMM4_2))
static_branch_enable(&have_crc32);
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
static_branch_enable(&have_pclmulqdq);
INIT_CRC_PCLMUL(crc32_lsb);
+ INIT_CRC_PCLMUL(crc32_msb);
}
return 0;
}
arch_initcall(crc32_x86_init);
diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S
index cf07d571ae864..d562944211d4d 100644
--- a/arch/x86/lib/crc32-pclmul.S
+++ b/arch/x86/lib/crc32-pclmul.S
@@ -2,5 +2,6 @@
// Copyright 2024 Google LLC
#include "crc-pclmul-template.S"
DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)
+DEFINE_CRC_PCLMUL_FUNCS(crc32_msb, /* bits= */ 32, /* lsb= */ 0)
--
2.47.0
next prev parent reply other threads:[~2024-11-25 4:12 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-25 4:11 [PATCH 0/6] x86: new optimized CRC functions, with VPCLMULQDQ support Eric Biggers
2024-11-25 4:11 ` [PATCH 1/6] x86: move zmm exclusion list into CPU feature flag Eric Biggers
2024-11-25 8:33 ` Ingo Molnar
2024-11-25 18:08 ` Eric Biggers
2024-11-25 20:25 ` Ingo Molnar
2024-11-25 4:11 ` [PATCH 2/6] scripts/crc: add gen-crc-consts.py Eric Biggers
2024-11-29 16:09 ` Ard Biesheuvel
2024-11-29 17:47 ` Eric Biggers
2024-11-29 18:33 ` Ard Biesheuvel
2024-11-25 4:11 ` [PATCH 3/6] x86/crc: add "template" for [V]PCLMULQDQ based CRC functions Eric Biggers
2024-11-25 4:11 ` [PATCH 4/6] x86/crc32: implement crc32_le using new template Eric Biggers
2024-11-25 4:11 ` [PATCH 5/6] x86/crc-t10dif: implement crc_t10dif " Eric Biggers
2024-11-25 4:11 ` Eric Biggers [this message]
2024-11-29 16:16 ` [PATCH 0/6] x86: new optimized CRC functions, with VPCLMULQDQ support Ard Biesheuvel
2024-11-29 17:50 ` Eric Biggers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241125041129.192999-7-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=ardb@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.