* [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s()
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
@ 2025-10-18 4:30 ` Eric Biggers
2025-10-19 14:36 ` Jason A. Donenfeld
2025-10-18 4:30 ` [PATCH 02/10] lib/crypto: blake2s: Rename blake2s_state to blake2s_ctx Eric Biggers
` (10 subsequent siblings)
11 siblings, 1 reply; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:30 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Reorder the parameters of blake2s() from (out, in, key, outlen, inlen,
keylen) to (key, keylen, in, inlen, out, outlen).
This aligns BLAKE2s with the common conventions of pairing buffers and
their lengths, and having outputs follow inputs. This is widely used
elsewhere in lib/crypto/ and crypto/, and even elsewhere in the BLAKE2s
code itself such as blake2s_init_key() and blake2s_final(). So
blake2s() was a bit of an exception.
Notably, this results in the same order as hmac_*_usingrawkey().
Note that since the type signature changed, it's not possible for a
blake2s() call site to be silently missed.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
drivers/char/random.c | 4 ++--
drivers/net/wireguard/cookie.c | 4 ++--
drivers/net/wireguard/noise.c | 4 ++--
include/crypto/blake2s.h | 6 +++---
lib/crypto/tests/blake2s_kunit.c | 16 ++++++++--------
5 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index b8b24b6ed3fe4..422c5c76571b9 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -699,21 +699,21 @@ static void extract_entropy(void *buf, size_t len)
/* seed = HASHPRF(last_key, entropy_input) */
blake2s_final(&input_pool.hash, seed);
/* next_key = HASHPRF(seed, RDSEED || 0) */
block.counter = 0;
- blake2s(next_key, (u8 *)&block, seed, sizeof(next_key), sizeof(block), sizeof(seed));
+ blake2s(seed, sizeof(seed), (const u8 *)&block, sizeof(block), next_key, sizeof(next_key));
blake2s_init_key(&input_pool.hash, BLAKE2S_HASH_SIZE, next_key, sizeof(next_key));
spin_unlock_irqrestore(&input_pool.lock, flags);
memzero_explicit(next_key, sizeof(next_key));
while (len) {
i = min_t(size_t, len, BLAKE2S_HASH_SIZE);
/* output = HASHPRF(seed, RDSEED || ++counter) */
++block.counter;
- blake2s(buf, (u8 *)&block, seed, i, sizeof(block), sizeof(seed));
+ blake2s(seed, sizeof(seed), (const u8 *)&block, sizeof(block), buf, i);
len -= i;
buf += i;
}
memzero_explicit(seed, sizeof(seed));
diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c
index 94d0a7206084e..be1b83aae03bf 100644
--- a/drivers/net/wireguard/cookie.c
+++ b/drivers/net/wireguard/cookie.c
@@ -75,19 +75,19 @@ void wg_cookie_init(struct cookie *cookie)
static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
const u8 key[NOISE_SYMMETRIC_KEY_LEN])
{
len = len - sizeof(struct message_macs) +
offsetof(struct message_macs, mac1);
- blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
+ blake2s(key, NOISE_SYMMETRIC_KEY_LEN, message, len, mac1, COOKIE_LEN);
}
static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
const u8 cookie[COOKIE_LEN])
{
len = len - sizeof(struct message_macs) +
offsetof(struct message_macs, mac2);
- blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
+ blake2s(cookie, COOKIE_LEN, message, len, mac2, COOKIE_LEN);
}
static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
struct cookie_checker *checker)
{
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 7eb9a23a3d4d9..306abb876c805 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -33,12 +33,12 @@ static atomic64_t keypair_counter = ATOMIC64_INIT(0);
void __init wg_noise_init(void)
{
struct blake2s_state blake;
- blake2s(handshake_init_chaining_key, handshake_name, NULL,
- NOISE_HASH_LEN, sizeof(handshake_name), 0);
+ blake2s(NULL, 0, handshake_name, sizeof(handshake_name),
+ handshake_init_chaining_key, NOISE_HASH_LEN);
blake2s_init(&blake, NOISE_HASH_LEN);
blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
blake2s_update(&blake, identifier_name, sizeof(identifier_name));
blake2s_final(&blake, handshake_init_hash);
}
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index f9ffd39194eb8..a7dd678725b27 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -84,13 +84,13 @@ static inline void blake2s_init_key(struct blake2s_state *state,
}
void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
void blake2s_final(struct blake2s_state *state, u8 *out);
-static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
- const size_t outlen, const size_t inlen,
- const size_t keylen)
+static inline void blake2s(const u8 *key, const size_t keylen,
+ const u8 *in, const size_t inlen,
+ u8 *out, const size_t outlen)
{
struct blake2s_state state;
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
diff --git a/lib/crypto/tests/blake2s_kunit.c b/lib/crypto/tests/blake2s_kunit.c
index 057c40132246f..247bbdf7dc864 100644
--- a/lib/crypto/tests/blake2s_kunit.c
+++ b/lib/crypto/tests/blake2s_kunit.c
@@ -12,11 +12,11 @@
*/
static void blake2s_default(const u8 *data, size_t len,
u8 out[BLAKE2S_HASH_SIZE])
{
- blake2s(out, data, NULL, BLAKE2S_HASH_SIZE, len, 0);
+ blake2s(NULL, 0, data, len, out, BLAKE2S_HASH_SIZE);
}
static void blake2s_init_default(struct blake2s_state *state)
{
blake2s_init(state, BLAKE2S_HASH_SIZE);
@@ -50,11 +50,11 @@ static void test_blake2s_all_key_and_hash_lens(struct kunit *test)
rand_bytes_seeded_from_len(data, data_len);
blake2s_init(&main_state, BLAKE2S_HASH_SIZE);
for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) {
rand_bytes_seeded_from_len(key, key_len);
for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) {
- blake2s(hash, data, key, out_len, data_len, key_len);
+ blake2s(key, key_len, data, data_len, hash, out_len);
blake2s_update(&main_state, hash, out_len);
}
}
blake2s_final(&main_state, main_hash);
KUNIT_ASSERT_MEMEQ(test, main_hash, blake2s_keyed_testvec_consolidated,
@@ -78,14 +78,14 @@ static void test_blake2s_with_guarded_key_buf(struct kunit *test)
struct blake2s_state state;
rand_bytes(key, key_len);
memcpy(guarded_key, key, key_len);
- blake2s(hash1, test_buf, key,
- BLAKE2S_HASH_SIZE, data_len, key_len);
- blake2s(hash2, test_buf, guarded_key,
- BLAKE2S_HASH_SIZE, data_len, key_len);
+ blake2s(key, key_len, test_buf, data_len,
+ hash1, BLAKE2S_HASH_SIZE);
+ blake2s(guarded_key, key_len, test_buf, data_len,
+ hash2, BLAKE2S_HASH_SIZE);
KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE);
blake2s_init_key(&state, BLAKE2S_HASH_SIZE,
guarded_key, key_len);
blake2s_update(&state, test_buf, data_len);
@@ -105,12 +105,12 @@ static void test_blake2s_with_guarded_out_buf(struct kunit *test)
rand_bytes(test_buf, data_len);
for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) {
u8 hash[BLAKE2S_HASH_SIZE];
u8 *guarded_hash = &test_buf[TEST_BUF_LEN - out_len];
- blake2s(hash, test_buf, NULL, out_len, data_len, 0);
- blake2s(guarded_hash, test_buf, NULL, out_len, data_len, 0);
+ blake2s(NULL, 0, test_buf, data_len, hash, out_len);
+ blake2s(NULL, 0, test_buf, data_len, guarded_hash, out_len);
KUNIT_ASSERT_MEMEQ(test, hash, guarded_hash, out_len);
}
}
static struct kunit_case blake2s_test_cases[] = {
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s()
2025-10-18 4:30 ` [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s() Eric Biggers
@ 2025-10-19 14:36 ` Jason A. Donenfeld
2025-10-19 16:07 ` Eric Biggers
0 siblings, 1 reply; 20+ messages in thread
From: Jason A. Donenfeld @ 2025-10-19 14:36 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel
On Fri, Oct 17, 2025 at 09:30:57PM -0700, Eric Biggers wrote:
> Reorder the parameters of blake2s() from (out, in, key, outlen, inlen,
> keylen) to (key, keylen, in, inlen, out, outlen).
No objections to putting the size next to the argument. That makes
sense. But the order really should be:
out, outlen, in, inlen, key, keylen
in order to match normal APIs that output data. The output argument goes
first. The input argument goes next. Auxiliary information goes after.
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s()
2025-10-19 14:36 ` Jason A. Donenfeld
@ 2025-10-19 16:07 ` Eric Biggers
2025-10-20 17:44 ` Jason A. Donenfeld
0 siblings, 1 reply; 20+ messages in thread
From: Eric Biggers @ 2025-10-19 16:07 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel
On Sun, Oct 19, 2025 at 04:36:36PM +0200, Jason A. Donenfeld wrote:
> On Fri, Oct 17, 2025 at 09:30:57PM -0700, Eric Biggers wrote:
> > Reorder the parameters of blake2s() from (out, in, key, outlen, inlen,
> > keylen) to (key, keylen, in, inlen, out, outlen).
>
> No objections to putting the size next to the argument. That makes
> sense. But the order really should be:
>
> out, outlen, in, inlen, key, keylen
>
> in order to match normal APIs that output data. The output argument goes
> first. The input argument goes next. Auxiliary information goes after.
In general, both conventions are common. But in the other hashing
functions in the kernel, we've been using output last. I'd like to
prioritize making it consistent with:
md5()
sha1()
sha224()
sha256()
sha384()
sha512()
hmac_md5()
hmac_sha1()
hmac_sha224()
hmac_sha256()
hmac_sha384()
hmac_sha512()
hmac_md5_usingrawkey()
hmac_sha1_usingrawkey()
hmac_sha224_usingrawkey()
hmac_sha256_usingrawkey()
hmac_sha384_usingrawkey()
hmac_sha512_usingrawkey()
crypto_shash_finup()
crypto_shash_digest()
crypto_shash_tfm_digest()
[and the SHA-3 functions in David's patchset]
- Eric
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s()
2025-10-19 16:07 ` Eric Biggers
@ 2025-10-20 17:44 ` Jason A. Donenfeld
0 siblings, 0 replies; 20+ messages in thread
From: Jason A. Donenfeld @ 2025-10-20 17:44 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel
Hi Eric,
On Sun, Oct 19, 2025 at 09:07:29AM -0700, Eric Biggers wrote:
> On Sun, Oct 19, 2025 at 04:36:36PM +0200, Jason A. Donenfeld wrote:
> > On Fri, Oct 17, 2025 at 09:30:57PM -0700, Eric Biggers wrote:
> > > Reorder the parameters of blake2s() from (out, in, key, outlen, inlen,
> > > keylen) to (key, keylen, in, inlen, out, outlen).
> >
> > No objections to putting the size next to the argument. That makes
> > sense. But the order really should be:
> >
> > out, outlen, in, inlen, key, keylen
> >
> > in order to match normal APIs that output data. The output argument goes
> > first. The input argument goes next. Auxiliary information goes after.
>
> In general, both conventions are common. But in the other hashing
> functions in the kernel, we've been using output last. I'd like to
> prioritize making it consistent with:
Hm. I don't like that. But I guess if that's what
every-single-other-hash-function-does, then blake2s should follow the
convention, to avoid churn of adding something new?
I went looking at C crypto libraries to see what generally the trend is,
and I saw that crypto_hash from nacl and libsodium and supercop do `out,
in`, as does cryptlib, but beyond that, most libraries don't provide an
all-in-one-interface but only have init/update/final. So however you see
fit, I guess; I don't want to hold up progress.
Jason
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 02/10] lib/crypto: blake2s: Rename blake2s_state to blake2s_ctx
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
2025-10-18 4:30 ` [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s() Eric Biggers
@ 2025-10-18 4:30 ` Eric Biggers
2025-10-18 4:30 ` [PATCH 03/10] lib/crypto: blake2s: Drop excessive const & rename block => data Eric Biggers
` (9 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:30 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
For consistency with the SHA-1, SHA-2, SHA-3 (in development), and MD5
library APIs, rename blake2s_state to blake2s_ctx.
As a refresher, the ctx name:
- Is a bit shorter.
- Avoids confusion with the compression function state, which is also
often called the state (but is just part of the full context).
- Is consistent with OpenSSL.
Not a big deal, of course. But consistency is nice. With a BLAKE2b
library API about to be added, this is a convenient time to update this.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
drivers/char/random.c | 2 +-
drivers/net/wireguard/cookie.c | 14 ++++----
drivers/net/wireguard/noise.c | 28 +++++++--------
include/crypto/blake2s.h | 59 ++++++++++++++++----------------
lib/crypto/arm/blake2s-core.S | 10 +++---
lib/crypto/arm/blake2s.h | 4 +--
lib/crypto/blake2s.c | 58 +++++++++++++++----------------
lib/crypto/tests/blake2s_kunit.c | 23 ++++++-------
lib/crypto/x86/blake2s.h | 12 +++----
9 files changed, 104 insertions(+), 106 deletions(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 422c5c76571b9..7e0486d8c51de 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -634,11 +634,11 @@ enum {
POOL_READY_BITS = POOL_BITS, /* When crng_init->CRNG_READY */
POOL_EARLY_BITS = POOL_READY_BITS / 2 /* When crng_init->CRNG_EARLY */
};
static struct {
- struct blake2s_state hash;
+ struct blake2s_ctx hash;
spinlock_t lock;
unsigned int init_bits;
} input_pool = {
.hash.h = { BLAKE2S_IV0 ^ (0x01010000 | BLAKE2S_HASH_SIZE),
BLAKE2S_IV1, BLAKE2S_IV2, BLAKE2S_IV3, BLAKE2S_IV4,
diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c
index be1b83aae03bf..08731b3fa32b7 100644
--- a/drivers/net/wireguard/cookie.c
+++ b/drivers/net/wireguard/cookie.c
@@ -31,11 +31,11 @@ static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] __nonstring = "cookie--";
static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
const u8 label[COOKIE_KEY_LABEL_LEN])
{
- struct blake2s_state blake;
+ struct blake2s_ctx blake;
blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
blake2s_final(&blake, key);
@@ -89,11 +89,11 @@ static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
}
static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
struct cookie_checker *checker)
{
- struct blake2s_state state;
+ struct blake2s_ctx blake;
if (wg_birthdate_has_expired(checker->secret_birthdate,
COOKIE_SECRET_MAX_AGE)) {
down_write(&checker->secret_lock);
checker->secret_birthdate = ktime_get_coarse_boottime_ns();
@@ -101,19 +101,19 @@ static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
up_write(&checker->secret_lock);
}
down_read(&checker->secret_lock);
- blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
+ blake2s_init_key(&blake, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
if (skb->protocol == htons(ETH_P_IP))
- blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
+ blake2s_update(&blake, (u8 *)&ip_hdr(skb)->saddr,
sizeof(struct in_addr));
else if (skb->protocol == htons(ETH_P_IPV6))
- blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
+ blake2s_update(&blake, (u8 *)&ipv6_hdr(skb)->saddr,
sizeof(struct in6_addr));
- blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
- blake2s_final(&state, cookie);
+ blake2s_update(&blake, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
+ blake2s_final(&blake, cookie);
up_read(&checker->secret_lock);
}
enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
index 306abb876c805..1fe8468f0bef3 100644
--- a/drivers/net/wireguard/noise.c
+++ b/drivers/net/wireguard/noise.c
@@ -31,11 +31,11 @@ static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
static atomic64_t keypair_counter = ATOMIC64_INIT(0);
void __init wg_noise_init(void)
{
- struct blake2s_state blake;
+ struct blake2s_ctx blake;
blake2s(NULL, 0, handshake_name, sizeof(handshake_name),
handshake_init_chaining_key, NOISE_HASH_LEN);
blake2s_init(&blake, NOISE_HASH_LEN);
blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
@@ -302,37 +302,37 @@ void wg_noise_set_static_identity_private_key(
static_identity->static_public, private_key);
}
static void hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen, const size_t keylen)
{
- struct blake2s_state state;
+ struct blake2s_ctx blake;
u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
int i;
if (keylen > BLAKE2S_BLOCK_SIZE) {
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, key, keylen);
- blake2s_final(&state, x_key);
+ blake2s_init(&blake, BLAKE2S_HASH_SIZE);
+ blake2s_update(&blake, key, keylen);
+ blake2s_final(&blake, x_key);
} else
memcpy(x_key, key, keylen);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x36;
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
- blake2s_update(&state, in, inlen);
- blake2s_final(&state, i_hash);
+ blake2s_init(&blake, BLAKE2S_HASH_SIZE);
+ blake2s_update(&blake, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&blake, in, inlen);
+ blake2s_final(&blake, i_hash);
for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
x_key[i] ^= 0x5c ^ 0x36;
- blake2s_init(&state, BLAKE2S_HASH_SIZE);
- blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
- blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
- blake2s_final(&state, i_hash);
+ blake2s_init(&blake, BLAKE2S_HASH_SIZE);
+ blake2s_update(&blake, x_key, BLAKE2S_BLOCK_SIZE);
+ blake2s_update(&blake, i_hash, BLAKE2S_HASH_SIZE);
+ blake2s_final(&blake, i_hash);
memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
}
@@ -429,11 +429,11 @@ static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
return true;
}
static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
{
- struct blake2s_state blake;
+ struct blake2s_ctx blake;
blake2s_init(&blake, NOISE_HASH_LEN);
blake2s_update(&blake, hash, NOISE_HASH_LEN);
blake2s_update(&blake, src, src_len);
blake2s_final(&blake, hash);
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index a7dd678725b27..4c8d532ee97b3 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -20,11 +20,11 @@ enum blake2s_lengths {
BLAKE2S_160_HASH_SIZE = 20,
BLAKE2S_224_HASH_SIZE = 28,
BLAKE2S_256_HASH_SIZE = 32,
};
-struct blake2s_state {
+struct blake2s_ctx {
/* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
u32 h[8];
u32 t[2];
u32 f[2];
u8 buf[BLAKE2S_BLOCK_SIZE];
@@ -41,64 +41,63 @@ enum blake2s_iv {
BLAKE2S_IV5 = 0x9B05688CUL,
BLAKE2S_IV6 = 0x1F83D9ABUL,
BLAKE2S_IV7 = 0x5BE0CD19UL,
};
-static inline void __blake2s_init(struct blake2s_state *state, size_t outlen,
+static inline void __blake2s_init(struct blake2s_ctx *ctx, size_t outlen,
const void *key, size_t keylen)
{
- state->h[0] = BLAKE2S_IV0 ^ (0x01010000 | keylen << 8 | outlen);
- state->h[1] = BLAKE2S_IV1;
- state->h[2] = BLAKE2S_IV2;
- state->h[3] = BLAKE2S_IV3;
- state->h[4] = BLAKE2S_IV4;
- state->h[5] = BLAKE2S_IV5;
- state->h[6] = BLAKE2S_IV6;
- state->h[7] = BLAKE2S_IV7;
- state->t[0] = 0;
- state->t[1] = 0;
- state->f[0] = 0;
- state->f[1] = 0;
- state->buflen = 0;
- state->outlen = outlen;
+ ctx->h[0] = BLAKE2S_IV0 ^ (0x01010000 | keylen << 8 | outlen);
+ ctx->h[1] = BLAKE2S_IV1;
+ ctx->h[2] = BLAKE2S_IV2;
+ ctx->h[3] = BLAKE2S_IV3;
+ ctx->h[4] = BLAKE2S_IV4;
+ ctx->h[5] = BLAKE2S_IV5;
+ ctx->h[6] = BLAKE2S_IV6;
+ ctx->h[7] = BLAKE2S_IV7;
+ ctx->t[0] = 0;
+ ctx->t[1] = 0;
+ ctx->f[0] = 0;
+ ctx->f[1] = 0;
+ ctx->buflen = 0;
+ ctx->outlen = outlen;
if (keylen) {
- memcpy(state->buf, key, keylen);
- memset(&state->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
- state->buflen = BLAKE2S_BLOCK_SIZE;
+ memcpy(ctx->buf, key, keylen);
+ memset(&ctx->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
+ ctx->buflen = BLAKE2S_BLOCK_SIZE;
}
}
-static inline void blake2s_init(struct blake2s_state *state,
- const size_t outlen)
+static inline void blake2s_init(struct blake2s_ctx *ctx, const size_t outlen)
{
- __blake2s_init(state, outlen, NULL, 0);
+ __blake2s_init(ctx, outlen, NULL, 0);
}
-static inline void blake2s_init_key(struct blake2s_state *state,
+static inline void blake2s_init_key(struct blake2s_ctx *ctx,
const size_t outlen, const void *key,
const size_t keylen)
{
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
- __blake2s_init(state, outlen, key, keylen);
+ __blake2s_init(ctx, outlen, key, keylen);
}
-void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
-void blake2s_final(struct blake2s_state *state, u8 *out);
+void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen);
+void blake2s_final(struct blake2s_ctx *ctx, u8 *out);
static inline void blake2s(const u8 *key, const size_t keylen,
const u8 *in, const size_t inlen,
u8 *out, const size_t outlen)
{
- struct blake2s_state state;
+ struct blake2s_ctx ctx;
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
(!key && keylen)));
- __blake2s_init(&state, outlen, key, keylen);
- blake2s_update(&state, in, inlen);
- blake2s_final(&state, out);
+ __blake2s_init(&ctx, outlen, key, keylen);
+ blake2s_update(&ctx, in, inlen);
+ blake2s_final(&ctx, out);
}
#endif /* _CRYPTO_BLAKE2S_H */
diff --git a/lib/crypto/arm/blake2s-core.S b/lib/crypto/arm/blake2s-core.S
index 293f44fa8f316..78e758a7cb3e2 100644
--- a/lib/crypto/arm/blake2s-core.S
+++ b/lib/crypto/arm/blake2s-core.S
@@ -168,24 +168,24 @@
\s12, \s13, \s14, \s15
__strd r10, r11, sp, 20
.endm
//
-// void blake2s_compress(struct blake2s_state *state,
+// void blake2s_compress(struct blake2s_ctx *ctx,
// const u8 *block, size_t nblocks, u32 inc);
//
-// Only the first three fields of struct blake2s_state are used:
+// Only the first three fields of struct blake2s_ctx are used:
// u32 h[8]; (inout)
// u32 t[2]; (inout)
// u32 f[2]; (in)
//
.align 5
ENTRY(blake2s_compress)
push {r0-r2,r4-r11,lr} // keep this an even number
.Lnext_block:
- // r0 is 'state'
+ // r0 is 'ctx'
// r1 is 'block'
// r3 is 'inc'
// Load and increment the counter t[0..1].
__ldrd r10, r11, r0, 32
@@ -209,11 +209,11 @@ ENTRY(blake2s_compress)
.Lcopy_block_done:
str r1, [sp, #68] // Update message pointer
// Calculate v[8..15]. Push v[9..15] onto the stack, and leave space
// for spilling v[8..9]. Leave v[8..9] in r8-r9.
- mov r14, r0 // r14 = state
+ mov r14, r0 // r14 = ctx
adr r12, .Lblake2s_IV
ldmia r12!, {r8-r9} // load IV[0..1]
__ldrd r0, r1, r14, 40 // load f[0..1]
ldm r12, {r2-r7} // load IV[3..7]
eor r4, r4, r10 // v[12] = IV[4] ^ t[0]
@@ -273,11 +273,11 @@ ENTRY(blake2s_compress)
stm r14, {r0-r3} // store new h[4..7]
// Advance to the next block, if there is one. Note that if there are
// multiple blocks, then 'inc' (the counter increment amount) must be
// 64. So we can simply set it to 64 without re-loading it.
- ldm sp, {r0, r1, r2} // load (state, block, nblocks)
+ ldm sp, {r0, r1, r2} // load (ctx, block, nblocks)
mov r3, #64 // set 'inc'
subs r2, r2, #1 // nblocks--
str r2, [sp, #8]
bne .Lnext_block // nblocks != 0?
diff --git a/lib/crypto/arm/blake2s.h b/lib/crypto/arm/blake2s.h
index aa7a97139ea74..ce009cd98de90 100644
--- a/lib/crypto/arm/blake2s.h
+++ b/lib/crypto/arm/blake2s.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* defined in blake2s-core.S */
-void blake2s_compress(struct blake2s_state *state, const u8 *block,
- size_t nblocks, u32 inc);
+void blake2s_compress(struct blake2s_ctx *ctx,
+ const u8 *block, size_t nblocks, u32 inc);
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 5638ed9d882d8..1ad36cb29835f 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -27,41 +27,41 @@ static const u8 blake2s_sigma[10][16] = {
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
};
-static inline void blake2s_increment_counter(struct blake2s_state *state,
+static inline void blake2s_increment_counter(struct blake2s_ctx *ctx,
const u32 inc)
{
- state->t[0] += inc;
- state->t[1] += (state->t[0] < inc);
+ ctx->t[0] += inc;
+ ctx->t[1] += (ctx->t[0] < inc);
}
static void __maybe_unused
-blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
+blake2s_compress_generic(struct blake2s_ctx *ctx, const u8 *block,
size_t nblocks, const u32 inc)
{
u32 m[16];
u32 v[16];
int i;
WARN_ON(IS_ENABLED(DEBUG) &&
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
while (nblocks > 0) {
- blake2s_increment_counter(state, inc);
+ blake2s_increment_counter(ctx, inc);
memcpy(m, block, BLAKE2S_BLOCK_SIZE);
le32_to_cpu_array(m, ARRAY_SIZE(m));
- memcpy(v, state->h, 32);
+ memcpy(v, ctx->h, 32);
v[ 8] = BLAKE2S_IV0;
v[ 9] = BLAKE2S_IV1;
v[10] = BLAKE2S_IV2;
v[11] = BLAKE2S_IV3;
- v[12] = BLAKE2S_IV4 ^ state->t[0];
- v[13] = BLAKE2S_IV5 ^ state->t[1];
- v[14] = BLAKE2S_IV6 ^ state->f[0];
- v[15] = BLAKE2S_IV7 ^ state->f[1];
+ v[12] = BLAKE2S_IV4 ^ ctx->t[0];
+ v[13] = BLAKE2S_IV5 ^ ctx->t[1];
+ v[14] = BLAKE2S_IV6 ^ ctx->f[0];
+ v[15] = BLAKE2S_IV7 ^ ctx->f[1];
#define G(r, i, a, b, c, d) do { \
a += b + m[blake2s_sigma[r][2 * i + 0]]; \
d = ror32(d ^ a, 16); \
c += d; \
@@ -95,11 +95,11 @@ blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
#undef G
#undef ROUND
for (i = 0; i < 8; ++i)
- state->h[i] ^= v[i] ^ v[i + 8];
+ ctx->h[i] ^= v[i] ^ v[i + 8];
block += BLAKE2S_BLOCK_SIZE;
--nblocks;
}
}
@@ -108,49 +108,49 @@ blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
#include "blake2s.h" /* $(SRCARCH)/blake2s.h */
#else
#define blake2s_compress blake2s_compress_generic
#endif
-static inline void blake2s_set_lastblock(struct blake2s_state *state)
+static inline void blake2s_set_lastblock(struct blake2s_ctx *ctx)
{
- state->f[0] = -1;
+ ctx->f[0] = -1;
}
-void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen)
{
- const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+ const size_t fill = BLAKE2S_BLOCK_SIZE - ctx->buflen;
if (unlikely(!inlen))
return;
if (inlen > fill) {
- memcpy(state->buf + state->buflen, in, fill);
- blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
- state->buflen = 0;
+ memcpy(ctx->buf + ctx->buflen, in, fill);
+ blake2s_compress(ctx, ctx->buf, 1, BLAKE2S_BLOCK_SIZE);
+ ctx->buflen = 0;
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
- blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ blake2s_compress(ctx, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
- memcpy(state->buf + state->buflen, in, inlen);
- state->buflen += inlen;
+ memcpy(ctx->buf + ctx->buflen, in, inlen);
+ ctx->buflen += inlen;
}
EXPORT_SYMBOL(blake2s_update);
-void blake2s_final(struct blake2s_state *state, u8 *out)
+void blake2s_final(struct blake2s_ctx *ctx, u8 *out)
{
WARN_ON(IS_ENABLED(DEBUG) && !out);
- blake2s_set_lastblock(state);
- memset(state->buf + state->buflen, 0,
- BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
- blake2s_compress(state, state->buf, 1, state->buflen);
- cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
- memcpy(out, state->h, state->outlen);
- memzero_explicit(state, sizeof(*state));
+ blake2s_set_lastblock(ctx);
+ memset(ctx->buf + ctx->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - ctx->buflen); /* Padding */
+ blake2s_compress(ctx, ctx->buf, 1, ctx->buflen);
+ cpu_to_le32_array(ctx->h, ARRAY_SIZE(ctx->h));
+ memcpy(out, ctx->h, ctx->outlen);
+ memzero_explicit(ctx, sizeof(*ctx));
}
EXPORT_SYMBOL(blake2s_final);
#ifdef blake2s_mod_init_arch
static int __init blake2s_mod_init(void)
diff --git a/lib/crypto/tests/blake2s_kunit.c b/lib/crypto/tests/blake2s_kunit.c
index 247bbdf7dc864..6832d9aa7b82d 100644
--- a/lib/crypto/tests/blake2s_kunit.c
+++ b/lib/crypto/tests/blake2s_kunit.c
@@ -15,21 +15,21 @@ static void blake2s_default(const u8 *data, size_t len,
u8 out[BLAKE2S_HASH_SIZE])
{
blake2s(NULL, 0, data, len, out, BLAKE2S_HASH_SIZE);
}
-static void blake2s_init_default(struct blake2s_state *state)
+static void blake2s_init_default(struct blake2s_ctx *ctx)
{
- blake2s_init(state, BLAKE2S_HASH_SIZE);
+ blake2s_init(ctx, BLAKE2S_HASH_SIZE);
}
/*
* Generate the HASH_KUNIT_CASES using hash-test-template.h. These test BLAKE2s
* with a key length of 0 and a hash length of BLAKE2S_HASH_SIZE.
*/
#define HASH blake2s_default
-#define HASH_CTX blake2s_state
+#define HASH_CTX blake2s_ctx
#define HASH_SIZE BLAKE2S_HASH_SIZE
#define HASH_INIT blake2s_init_default
#define HASH_UPDATE blake2s_update
#define HASH_FINAL blake2s_final
#include "hash-test-template.h"
@@ -42,23 +42,23 @@ static void test_blake2s_all_key_and_hash_lens(struct kunit *test)
{
const size_t data_len = 100;
u8 *data = &test_buf[0];
u8 *key = data + data_len;
u8 *hash = key + BLAKE2S_KEY_SIZE;
- struct blake2s_state main_state;
+ struct blake2s_ctx main_ctx;
u8 main_hash[BLAKE2S_HASH_SIZE];
rand_bytes_seeded_from_len(data, data_len);
- blake2s_init(&main_state, BLAKE2S_HASH_SIZE);
+ blake2s_init(&main_ctx, BLAKE2S_HASH_SIZE);
for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) {
rand_bytes_seeded_from_len(key, key_len);
for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) {
blake2s(key, key_len, data, data_len, hash, out_len);
- blake2s_update(&main_state, hash, out_len);
+ blake2s_update(&main_ctx, hash, out_len);
}
}
- blake2s_final(&main_state, main_hash);
+ blake2s_final(&main_ctx, main_hash);
KUNIT_ASSERT_MEMEQ(test, main_hash, blake2s_keyed_testvec_consolidated,
BLAKE2S_HASH_SIZE);
}
/*
@@ -73,25 +73,24 @@ static void test_blake2s_with_guarded_key_buf(struct kunit *test)
for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) {
u8 key[BLAKE2S_KEY_SIZE];
u8 *guarded_key = &test_buf[TEST_BUF_LEN - key_len];
u8 hash1[BLAKE2S_HASH_SIZE];
u8 hash2[BLAKE2S_HASH_SIZE];
- struct blake2s_state state;
+ struct blake2s_ctx ctx;
rand_bytes(key, key_len);
memcpy(guarded_key, key, key_len);
blake2s(key, key_len, test_buf, data_len,
hash1, BLAKE2S_HASH_SIZE);
blake2s(guarded_key, key_len, test_buf, data_len,
hash2, BLAKE2S_HASH_SIZE);
KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE);
- blake2s_init_key(&state, BLAKE2S_HASH_SIZE,
- guarded_key, key_len);
- blake2s_update(&state, test_buf, data_len);
- blake2s_final(&state, hash2);
+ blake2s_init_key(&ctx, BLAKE2S_HASH_SIZE, guarded_key, key_len);
+ blake2s_update(&ctx, test_buf, data_len);
+ blake2s_final(&ctx, hash2);
KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE);
}
}
/*
diff --git a/lib/crypto/x86/blake2s.h b/lib/crypto/x86/blake2s.h
index b6d30d2fa045e..de360935b8204 100644
--- a/lib/crypto/x86/blake2s.h
+++ b/lib/crypto/x86/blake2s.h
@@ -9,40 +9,40 @@
#include <asm/simd.h>
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
-asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+asmlinkage void blake2s_compress_ssse3(struct blake2s_ctx *ctx,
const u8 *block, const size_t nblocks,
const u32 inc);
-asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+asmlinkage void blake2s_compress_avx512(struct blake2s_ctx *ctx,
const u8 *block, const size_t nblocks,
const u32 inc);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
-static void blake2s_compress(struct blake2s_state *state, const u8 *block,
+static void blake2s_compress(struct blake2s_ctx *ctx, const u8 *block,
size_t nblocks, const u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
- blake2s_compress_generic(state, block, nblocks, inc);
+ blake2s_compress_generic(ctx, block, nblocks, inc);
return;
}
do {
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (static_branch_likely(&blake2s_use_avx512))
- blake2s_compress_avx512(state, block, blocks, inc);
+ blake2s_compress_avx512(ctx, block, blocks, inc);
else
- blake2s_compress_ssse3(state, block, blocks, inc);
+ blake2s_compress_ssse3(ctx, block, blocks, inc);
kernel_fpu_end();
nblocks -= blocks;
block += blocks * BLAKE2S_BLOCK_SIZE;
} while (nblocks);
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 03/10] lib/crypto: blake2s: Drop excessive const & rename block => data
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
2025-10-18 4:30 ` [PATCH 01/10] lib/crypto: blake2s: Adjust parameter order of blake2s() Eric Biggers
2025-10-18 4:30 ` [PATCH 02/10] lib/crypto: blake2s: Rename blake2s_state to blake2s_ctx Eric Biggers
@ 2025-10-18 4:30 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 04/10] lib/crypto: blake2s: Document the BLAKE2s library API Eric Biggers
` (8 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:30 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
A couple more small cleanups to the BLAKE2s code before these things get
propagated into the BLAKE2b code:
- Drop 'const' from some non-pointer function parameters. It was a bit
excessive and not conventional.
- Rename 'block' argument of blake2s_compress*() to 'data'. This is for
consistency with the SHA-* code, and also to avoid the implication
that it points to a singular "block".
No functional changes.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
include/crypto/blake2s.h | 13 ++++++-------
lib/crypto/arm/blake2s-core.S | 6 +++---
lib/crypto/arm/blake2s.h | 2 +-
lib/crypto/blake2s.c | 12 ++++++------
lib/crypto/x86/blake2s.h | 18 ++++++++----------
5 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index 4c8d532ee97b3..33893057eb414 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -65,31 +65,30 @@ static inline void __blake2s_init(struct blake2s_ctx *ctx, size_t outlen,
memset(&ctx->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
ctx->buflen = BLAKE2S_BLOCK_SIZE;
}
}
-static inline void blake2s_init(struct blake2s_ctx *ctx, const size_t outlen)
+static inline void blake2s_init(struct blake2s_ctx *ctx, size_t outlen)
{
__blake2s_init(ctx, outlen, NULL, 0);
}
-static inline void blake2s_init_key(struct blake2s_ctx *ctx,
- const size_t outlen, const void *key,
- const size_t keylen)
+static inline void blake2s_init_key(struct blake2s_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
{
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
__blake2s_init(ctx, outlen, key, keylen);
}
void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen);
void blake2s_final(struct blake2s_ctx *ctx, u8 *out);
-static inline void blake2s(const u8 *key, const size_t keylen,
- const u8 *in, const size_t inlen,
- u8 *out, const size_t outlen)
+static inline void blake2s(const u8 *key, size_t keylen,
+ const u8 *in, size_t inlen,
+ u8 *out, size_t outlen)
{
struct blake2s_ctx ctx;
WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
diff --git a/lib/crypto/arm/blake2s-core.S b/lib/crypto/arm/blake2s-core.S
index 78e758a7cb3e2..14eb7c18a8365 100644
--- a/lib/crypto/arm/blake2s-core.S
+++ b/lib/crypto/arm/blake2s-core.S
@@ -169,11 +169,11 @@
__strd r10, r11, sp, 20
.endm
//
// void blake2s_compress(struct blake2s_ctx *ctx,
-// const u8 *block, size_t nblocks, u32 inc);
+// const u8 *data, size_t nblocks, u32 inc);
//
// Only the first three fields of struct blake2s_ctx are used:
// u32 h[8]; (inout)
// u32 t[2]; (inout)
// u32 f[2]; (in)
@@ -182,11 +182,11 @@
ENTRY(blake2s_compress)
push {r0-r2,r4-r11,lr} // keep this an even number
.Lnext_block:
// r0 is 'ctx'
- // r1 is 'block'
+ // r1 is 'data'
// r3 is 'inc'
// Load and increment the counter t[0..1].
__ldrd r10, r11, r0, 32
adds r10, r10, r3
@@ -273,11 +273,11 @@ ENTRY(blake2s_compress)
stm r14, {r0-r3} // store new h[4..7]
// Advance to the next block, if there is one. Note that if there are
// multiple blocks, then 'inc' (the counter increment amount) must be
// 64. So we can simply set it to 64 without re-loading it.
- ldm sp, {r0, r1, r2} // load (ctx, block, nblocks)
+ ldm sp, {r0, r1, r2} // load (ctx, data, nblocks)
mov r3, #64 // set 'inc'
subs r2, r2, #1 // nblocks--
str r2, [sp, #8]
bne .Lnext_block // nblocks != 0?
diff --git a/lib/crypto/arm/blake2s.h b/lib/crypto/arm/blake2s.h
index ce009cd98de90..42c04440c1913 100644
--- a/lib/crypto/arm/blake2s.h
+++ b/lib/crypto/arm/blake2s.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* defined in blake2s-core.S */
void blake2s_compress(struct blake2s_ctx *ctx,
- const u8 *block, size_t nblocks, u32 inc);
+ const u8 *data, size_t nblocks, u32 inc);
diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
index 1ad36cb29835f..6182c21ed943d 100644
--- a/lib/crypto/blake2s.c
+++ b/lib/crypto/blake2s.c
@@ -27,31 +27,30 @@ static const u8 blake2s_sigma[10][16] = {
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
};
-static inline void blake2s_increment_counter(struct blake2s_ctx *ctx,
- const u32 inc)
+static inline void blake2s_increment_counter(struct blake2s_ctx *ctx, u32 inc)
{
ctx->t[0] += inc;
ctx->t[1] += (ctx->t[0] < inc);
}
static void __maybe_unused
-blake2s_compress_generic(struct blake2s_ctx *ctx, const u8 *block,
- size_t nblocks, const u32 inc)
+blake2s_compress_generic(struct blake2s_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc)
{
u32 m[16];
u32 v[16];
int i;
WARN_ON(IS_ENABLED(DEBUG) &&
(nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
while (nblocks > 0) {
blake2s_increment_counter(ctx, inc);
- memcpy(m, block, BLAKE2S_BLOCK_SIZE);
+ memcpy(m, data, BLAKE2S_BLOCK_SIZE);
le32_to_cpu_array(m, ARRAY_SIZE(m));
memcpy(v, ctx->h, 32);
v[ 8] = BLAKE2S_IV0;
v[ 9] = BLAKE2S_IV1;
v[10] = BLAKE2S_IV2;
@@ -97,11 +96,11 @@ blake2s_compress_generic(struct blake2s_ctx *ctx, const u8 *block,
#undef ROUND
for (i = 0; i < 8; ++i)
ctx->h[i] ^= v[i] ^ v[i + 8];
- block += BLAKE2S_BLOCK_SIZE;
+ data += BLAKE2S_BLOCK_SIZE;
--nblocks;
}
}
#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_ARCH
@@ -128,10 +127,11 @@ void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen)
in += fill;
inlen -= fill;
}
if (inlen > BLAKE2S_BLOCK_SIZE) {
const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+
blake2s_compress(ctx, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
}
memcpy(ctx->buf + ctx->buflen, in, inlen);
diff --git a/lib/crypto/x86/blake2s.h b/lib/crypto/x86/blake2s.h
index de360935b8204..f8eed6cb042e4 100644
--- a/lib/crypto/x86/blake2s.h
+++ b/lib/crypto/x86/blake2s.h
@@ -10,43 +10,41 @@
#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/sizes.h>
asmlinkage void blake2s_compress_ssse3(struct blake2s_ctx *ctx,
- const u8 *block, const size_t nblocks,
- const u32 inc);
+ const u8 *data, size_t nblocks, u32 inc);
asmlinkage void blake2s_compress_avx512(struct blake2s_ctx *ctx,
- const u8 *block, const size_t nblocks,
- const u32 inc);
+ const u8 *data, size_t nblocks, u32 inc);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
-static void blake2s_compress(struct blake2s_ctx *ctx, const u8 *block,
- size_t nblocks, const u32 inc)
+static void blake2s_compress(struct blake2s_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc)
{
/* SIMD disables preemption, so relax after processing each page. */
BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
- blake2s_compress_generic(ctx, block, nblocks, inc);
+ blake2s_compress_generic(ctx, data, nblocks, inc);
return;
}
do {
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2S_BLOCK_SIZE);
kernel_fpu_begin();
if (static_branch_likely(&blake2s_use_avx512))
- blake2s_compress_avx512(ctx, block, blocks, inc);
+ blake2s_compress_avx512(ctx, data, blocks, inc);
else
- blake2s_compress_ssse3(ctx, block, blocks, inc);
+ blake2s_compress_ssse3(ctx, data, blocks, inc);
kernel_fpu_end();
+ data += blocks * BLAKE2S_BLOCK_SIZE;
nblocks -= blocks;
- block += blocks * BLAKE2S_BLOCK_SIZE;
} while (nblocks);
}
#define blake2s_mod_init_arch blake2s_mod_init_arch
static void blake2s_mod_init_arch(void)
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 04/10] lib/crypto: blake2s: Document the BLAKE2s library API
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (2 preceding siblings ...)
2025-10-18 4:30 ` [PATCH 03/10] lib/crypto: blake2s: Drop excessive const & rename block => data Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 05/10] byteorder: Add le64_to_cpu_array() and cpu_to_le64_array() Eric Biggers
` (7 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Add kerneldoc for the BLAKE2s library API.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
include/crypto/blake2s.h | 58 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
index 33893057eb414..648cb78243588 100644
--- a/include/crypto/blake2s.h
+++ b/include/crypto/blake2s.h
@@ -20,10 +20,19 @@ enum blake2s_lengths {
BLAKE2S_160_HASH_SIZE = 20,
BLAKE2S_224_HASH_SIZE = 28,
BLAKE2S_256_HASH_SIZE = 32,
};
+/**
+ * struct blake2s_ctx - Context for hashing a message with BLAKE2s
+ * @h: compression function state
+ * @t: block counter
+ * @f: finalization indicator
+ * @buf: partial block buffer; 'buflen' bytes are valid
+ * @buflen: number of bytes buffered in @buf
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ */
struct blake2s_ctx {
/* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
u32 h[8];
u32 t[2];
u32 f[2];
@@ -65,27 +74,76 @@ static inline void __blake2s_init(struct blake2s_ctx *ctx, size_t outlen,
memset(&ctx->buf[keylen], 0, BLAKE2S_BLOCK_SIZE - keylen);
ctx->buflen = BLAKE2S_BLOCK_SIZE;
}
}
+/**
+ * blake2s_init() - Initialize a BLAKE2s context for a new message (unkeyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ *
+ * Context: Any context.
+ */
static inline void blake2s_init(struct blake2s_ctx *ctx, size_t outlen)
{
__blake2s_init(ctx, outlen, NULL, 0);
}
+/**
+ * blake2s_init_key() - Initialize a BLAKE2s context for a new message (keyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ * @key: the key
+ * @keylen: the key length in bytes, at most BLAKE2S_KEY_SIZE
+ *
+ * Context: Any context.
+ */
static inline void blake2s_init_key(struct blake2s_ctx *ctx, size_t outlen,
const void *key, size_t keylen)
{
WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
!key || !keylen || keylen > BLAKE2S_KEY_SIZE));
__blake2s_init(ctx, outlen, key, keylen);
}
+/**
+ * blake2s_update() - Update a BLAKE2s context with message data
+ * @ctx: the context to update; must have been initialized
+ * @in: the message data
+ * @inlen: the data length in bytes
+ *
+ * This can be called any number of times.
+ *
+ * Context: Any context.
+ */
void blake2s_update(struct blake2s_ctx *ctx, const u8 *in, size_t inlen);
+
+/**
+ * blake2s_final() - Finish computing a BLAKE2s hash
+ * @ctx: the context to finalize; must have been initialized
+ * @out: (output) the resulting BLAKE2s hash. Its length will be equal to the
+ * @outlen that was passed to blake2s_init() or blake2s_init_key().
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
void blake2s_final(struct blake2s_ctx *ctx, u8 *out);
+/**
+ * blake2s() - Compute BLAKE2s hash in one shot
+ * @key: the key, or NULL for an unkeyed hash
+ * @keylen: the key length in bytes (at most BLAKE2S_KEY_SIZE), or 0 for an
+ * unkeyed hash
+ * @in: the message data
+ * @inlen: the data length in bytes
+ * @out: (output) the resulting BLAKE2s hash, with length @outlen
+ * @outlen: length of output hash value in bytes, at most BLAKE2S_HASH_SIZE
+ *
+ * Context: Any context.
+ */
static inline void blake2s(const u8 *key, size_t keylen,
const u8 *in, size_t inlen,
u8 *out, size_t outlen)
{
struct blake2s_ctx ctx;
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 05/10] byteorder: Add le64_to_cpu_array() and cpu_to_le64_array()
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (3 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 04/10] lib/crypto: blake2s: Document the BLAKE2s library API Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 06/10] lib/crypto: blake2b: Add BLAKE2b library functions Eric Biggers
` (6 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Add le64_to_cpu_array() and cpu_to_le64_array(). These mirror the
corresponding 32-bit functions.
These will be used by the BLAKE2b code.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
include/linux/byteorder/generic.h | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index b3705e8bbe2b8..55a44199de872 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -171,10 +171,26 @@ static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
__cpu_to_le32s(buf);
buf++;
}
}
+static inline void le64_to_cpu_array(u64 *buf, unsigned int words)
+{
+ while (words--) {
+ __le64_to_cpus(buf);
+ buf++;
+ }
+}
+
+static inline void cpu_to_le64_array(u64 *buf, unsigned int words)
+{
+ while (words--) {
+ __cpu_to_le64s(buf);
+ buf++;
+ }
+}
+
static inline void memcpy_from_le32(u32 *dst, const __le32 *src, size_t words)
{
size_t i;
for (i = 0; i < words; i++)
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 06/10] lib/crypto: blake2b: Add BLAKE2b library functions
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (4 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 05/10] byteorder: Add le64_to_cpu_array() and cpu_to_le64_array() Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 07/10] lib/crypto: arm/blake2b: Migrate optimized code into library Eric Biggers
` (5 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Add a library API for BLAKE2b, closely modeled after the BLAKE2s API.
This will allow in-kernel users such as btrfs to use BLAKE2b without
going through the generic crypto layer. In addition, as usual the
BLAKE2b crypto_shash algorithms will be reimplemented on top of this.
Note: to create lib/crypto/blake2b.c I made a copy of
lib/crypto/blake2s.c and made the updates from BLAKE2s => BLAKE2b. This
way, the BLAKE2s and BLAKE2b code is kept consistent. Therefore, it
borrows the SPDX-License-Identifier and Copyright from
lib/crypto/blake2s.c rather than crypto/blake2b_generic.c.
The library API uses 'struct blake2b_ctx', consistent with other
lib/crypto/ APIs. The existing 'struct blake2b_state' will be removed
once the blake2b crypto_shash algorithms are updated to stop using it.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
include/crypto/blake2b.h | 133 ++++++++++++++++++++---
include/crypto/internal/blake2b.h | 17 ++-
lib/crypto/Kconfig | 10 ++
lib/crypto/Makefile | 9 ++
lib/crypto/blake2b.c | 174 ++++++++++++++++++++++++++++++
5 files changed, 330 insertions(+), 13 deletions(-)
create mode 100644 lib/crypto/blake2b.c
diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h
index dd7694477e50f..4879e2ec26867 100644
--- a/include/crypto/blake2b.h
+++ b/include/crypto/blake2b.h
@@ -26,10 +26,29 @@ enum blake2b_lengths {
BLAKE2B_256_HASH_SIZE = 32,
BLAKE2B_384_HASH_SIZE = 48,
BLAKE2B_512_HASH_SIZE = 64,
};
+/**
+ * struct blake2b_ctx - Context for hashing a message with BLAKE2b
+ * @h: compression function state
+ * @t: block counter
+ * @f: finalization indicator
+ * @buf: partial block buffer; 'buflen' bytes are valid
+ * @buflen: number of bytes buffered in @buf
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ */
+struct blake2b_ctx {
+ /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
+ u64 h[8];
+ u64 t[2];
+ u64 f[2];
+ u8 buf[BLAKE2B_BLOCK_SIZE];
+ unsigned int buflen;
+ unsigned int outlen;
+};
+
enum blake2b_iv {
BLAKE2B_IV0 = 0x6A09E667F3BCC908ULL,
BLAKE2B_IV1 = 0xBB67AE8584CAA73BULL,
BLAKE2B_IV2 = 0x3C6EF372FE94F82BULL,
BLAKE2B_IV3 = 0xA54FF53A5F1D36F1ULL,
@@ -37,21 +56,111 @@ enum blake2b_iv {
BLAKE2B_IV5 = 0x9B05688C2B3E6C1FULL,
BLAKE2B_IV6 = 0x1F83D9ABFB41BD6BULL,
BLAKE2B_IV7 = 0x5BE0CD19137E2179ULL,
};
-static inline void __blake2b_init(struct blake2b_state *state, size_t outlen,
- size_t keylen)
+static inline void __blake2b_init(struct blake2b_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
+{
+ ctx->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
+ ctx->h[1] = BLAKE2B_IV1;
+ ctx->h[2] = BLAKE2B_IV2;
+ ctx->h[3] = BLAKE2B_IV3;
+ ctx->h[4] = BLAKE2B_IV4;
+ ctx->h[5] = BLAKE2B_IV5;
+ ctx->h[6] = BLAKE2B_IV6;
+ ctx->h[7] = BLAKE2B_IV7;
+ ctx->t[0] = 0;
+ ctx->t[1] = 0;
+ ctx->f[0] = 0;
+ ctx->f[1] = 0;
+ ctx->buflen = 0;
+ ctx->outlen = outlen;
+ if (keylen) {
+ memcpy(ctx->buf, key, keylen);
+ memset(&ctx->buf[keylen], 0, BLAKE2B_BLOCK_SIZE - keylen);
+ ctx->buflen = BLAKE2B_BLOCK_SIZE;
+ }
+}
+
+/**
+ * blake2b_init() - Initialize a BLAKE2b context for a new message (unkeyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b_init(struct blake2b_ctx *ctx, size_t outlen)
+{
+ __blake2b_init(ctx, outlen, NULL, 0);
+}
+
+/**
+ * blake2b_init_key() - Initialize a BLAKE2b context for a new message (keyed)
+ * @ctx: the context to initialize
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ * @key: the key
+ * @keylen: the key length in bytes, at most BLAKE2B_KEY_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b_init_key(struct blake2b_ctx *ctx, size_t outlen,
+ const void *key, size_t keylen)
+{
+ WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2B_HASH_SIZE ||
+ !key || !keylen || keylen > BLAKE2B_KEY_SIZE));
+
+ __blake2b_init(ctx, outlen, key, keylen);
+}
+
+/**
+ * blake2b_update() - Update a BLAKE2b context with message data
+ * @ctx: the context to update; must have been initialized
+ * @in: the message data
+ * @inlen: the data length in bytes
+ *
+ * This can be called any number of times.
+ *
+ * Context: Any context.
+ */
+void blake2b_update(struct blake2b_ctx *ctx, const u8 *in, size_t inlen);
+
+/**
+ * blake2b_final() - Finish computing a BLAKE2b hash
+ * @ctx: the context to finalize; must have been initialized
+ * @out: (output) the resulting BLAKE2b hash. Its length will be equal to the
+ * @outlen that was passed to blake2b_init() or blake2b_init_key().
+ *
+ * After finishing, this zeroizes @ctx. So the caller does not need to do it.
+ *
+ * Context: Any context.
+ */
+void blake2b_final(struct blake2b_ctx *ctx, u8 *out);
+
+/**
+ * blake2b() - Compute BLAKE2b hash in one shot
+ * @key: the key, or NULL for an unkeyed hash
+ * @keylen: the key length in bytes (at most BLAKE2B_KEY_SIZE), or 0 for an
+ * unkeyed hash
+ * @in: the message data
+ * @inlen: the data length in bytes
+ * @out: (output) the resulting BLAKE2b hash, with length @outlen
+ * @outlen: length of output hash value in bytes, at most BLAKE2B_HASH_SIZE
+ *
+ * Context: Any context.
+ */
+static inline void blake2b(const u8 *key, size_t keylen,
+ const u8 *in, size_t inlen,
+ u8 *out, size_t outlen)
{
- state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
- state->h[1] = BLAKE2B_IV1;
- state->h[2] = BLAKE2B_IV2;
- state->h[3] = BLAKE2B_IV3;
- state->h[4] = BLAKE2B_IV4;
- state->h[5] = BLAKE2B_IV5;
- state->h[6] = BLAKE2B_IV6;
- state->h[7] = BLAKE2B_IV7;
- state->t[0] = 0;
- state->t[1] = 0;
+ struct blake2b_ctx ctx;
+
+ WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
+ outlen > BLAKE2B_HASH_SIZE || keylen > BLAKE2B_KEY_SIZE ||
+ (!key && keylen)));
+
+ __blake2b_init(&ctx, outlen, key, keylen);
+ blake2b_update(&ctx, in, inlen);
+ blake2b_final(&ctx, out);
}
#endif /* _CRYPTO_BLAKE2B_H */
diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h
index 3e09e24853060..3712df69def18 100644
--- a/include/crypto/internal/blake2b.h
+++ b/include/crypto/internal/blake2b.h
@@ -55,17 +55,32 @@ static inline int crypto_blake2b_setkey(struct crypto_shash *tfm,
tctx->keylen = keylen;
return 0;
}
+static inline void __crypto_blake2b_init(struct blake2b_state *state,
+ size_t outlen, size_t keylen)
+{
+ state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
+ state->h[1] = BLAKE2B_IV1;
+ state->h[2] = BLAKE2B_IV2;
+ state->h[3] = BLAKE2B_IV3;
+ state->h[4] = BLAKE2B_IV4;
+ state->h[5] = BLAKE2B_IV5;
+ state->h[6] = BLAKE2B_IV6;
+ state->h[7] = BLAKE2B_IV7;
+ state->t[0] = 0;
+ state->t[1] = 0;
+}
+
static inline int crypto_blake2b_init(struct shash_desc *desc)
{
const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct blake2b_state *state = shash_desc_ctx(desc);
unsigned int outlen = crypto_shash_digestsize(desc->tfm);
- __blake2b_init(state, outlen, tctx->keylen);
+ __crypto_blake2b_init(state, outlen, tctx->keylen);
return tctx->keylen ?
crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0;
}
static inline int crypto_blake2b_update_bo(struct shash_desc *desc,
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index eea17e36a22be..045fd79cc1bed 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -26,10 +26,20 @@ config CRYPTO_LIB_ARC4
tristate
config CRYPTO_LIB_GF128MUL
tristate
+config CRYPTO_LIB_BLAKE2B
+ tristate
+ help
+ The BLAKE2b library functions. Select this if your module uses any of
+ the functions from <crypto/blake2b.h>.
+
+config CRYPTO_LIB_BLAKE2B_ARCH
+ bool
+ depends on CRYPTO_LIB_BLAKE2B && !UML
+
# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option.
config CRYPTO_LIB_BLAKE2S_ARCH
bool
depends on !UML
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index bded351aeacef..f863417b16817 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -29,10 +29,19 @@ libarc4-y := arc4.o
obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
################################################################################
+obj-$(CONFIG_CRYPTO_LIB_BLAKE2B) += libblake2b.o
+libblake2b-y := blake2b.o
+CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
+ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
+CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
+endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
+
+################################################################################
+
# blake2s is used by the /dev/random driver which is always builtin
obj-y += blake2s.o
ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2S_ARCH),y)
CFLAGS_blake2s.o += -I$(src)/$(SRCARCH)
obj-$(CONFIG_ARM) += arm/blake2s-core.o
diff --git a/lib/crypto/blake2b.c b/lib/crypto/blake2b.c
new file mode 100644
index 0000000000000..09c6d65d8a6e6
--- /dev/null
+++ b/lib/crypto/blake2b.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright 2025 Google LLC
+ *
+ * This is an implementation of the BLAKE2b hash and PRF functions.
+ *
+ * Information: https://blake2.net/
+ */
+
+#include <crypto/blake2b.h>
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+static const u8 blake2b_sigma[12][16] = {
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
+};
+
+static inline void blake2b_increment_counter(struct blake2b_ctx *ctx, u32 inc)
+{
+ ctx->t[0] += inc;
+ ctx->t[1] += (ctx->t[0] < inc);
+}
+
+static void __maybe_unused
+blake2b_compress_generic(struct blake2b_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc)
+{
+ u64 m[16];
+ u64 v[16];
+ int i;
+
+ WARN_ON(IS_ENABLED(DEBUG) &&
+ (nblocks > 1 && inc != BLAKE2B_BLOCK_SIZE));
+
+ while (nblocks > 0) {
+ blake2b_increment_counter(ctx, inc);
+ memcpy(m, data, BLAKE2B_BLOCK_SIZE);
+ le64_to_cpu_array(m, ARRAY_SIZE(m));
+ memcpy(v, ctx->h, 64);
+ v[ 8] = BLAKE2B_IV0;
+ v[ 9] = BLAKE2B_IV1;
+ v[10] = BLAKE2B_IV2;
+ v[11] = BLAKE2B_IV3;
+ v[12] = BLAKE2B_IV4 ^ ctx->t[0];
+ v[13] = BLAKE2B_IV5 ^ ctx->t[1];
+ v[14] = BLAKE2B_IV6 ^ ctx->f[0];
+ v[15] = BLAKE2B_IV7 ^ ctx->f[1];
+
+#define G(r, i, a, b, c, d) do { \
+ a += b + m[blake2b_sigma[r][2 * i + 0]]; \
+ d = ror64(d ^ a, 32); \
+ c += d; \
+ b = ror64(b ^ c, 24); \
+ a += b + m[blake2b_sigma[r][2 * i + 1]]; \
+ d = ror64(d ^ a, 16); \
+ c += d; \
+ b = ror64(b ^ c, 63); \
+} while (0)
+
+#define ROUND(r) do { \
+ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
+ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
+ G(r, 2, v[2], v[ 6], v[10], v[14]); \
+ G(r, 3, v[3], v[ 7], v[11], v[15]); \
+ G(r, 4, v[0], v[ 5], v[10], v[15]); \
+ G(r, 5, v[1], v[ 6], v[11], v[12]); \
+ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
+ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
+} while (0)
+ ROUND(0);
+ ROUND(1);
+ ROUND(2);
+ ROUND(3);
+ ROUND(4);
+ ROUND(5);
+ ROUND(6);
+ ROUND(7);
+ ROUND(8);
+ ROUND(9);
+ ROUND(10);
+ ROUND(11);
+
+#undef G
+#undef ROUND
+
+ for (i = 0; i < 8; ++i)
+ ctx->h[i] ^= v[i] ^ v[i + 8];
+
+ data += BLAKE2B_BLOCK_SIZE;
+ --nblocks;
+ }
+}
+
+#ifdef CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
+#include "blake2b.h" /* $(SRCARCH)/blake2b.h */
+#else
+#define blake2b_compress blake2b_compress_generic
+#endif
+
+static inline void blake2b_set_lastblock(struct blake2b_ctx *ctx)
+{
+ ctx->f[0] = -1;
+}
+
+void blake2b_update(struct blake2b_ctx *ctx, const u8 *in, size_t inlen)
+{
+ const size_t fill = BLAKE2B_BLOCK_SIZE - ctx->buflen;
+
+ if (unlikely(!inlen))
+ return;
+ if (inlen > fill) {
+ memcpy(ctx->buf + ctx->buflen, in, fill);
+ blake2b_compress(ctx, ctx->buf, 1, BLAKE2B_BLOCK_SIZE);
+ ctx->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2B_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2B_BLOCK_SIZE);
+
+ blake2b_compress(ctx, in, nblocks - 1, BLAKE2B_BLOCK_SIZE);
+ in += BLAKE2B_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2B_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(ctx->buf + ctx->buflen, in, inlen);
+ ctx->buflen += inlen;
+}
+EXPORT_SYMBOL(blake2b_update);
+
+void blake2b_final(struct blake2b_ctx *ctx, u8 *out)
+{
+ WARN_ON(IS_ENABLED(DEBUG) && !out);
+ blake2b_set_lastblock(ctx);
+ memset(ctx->buf + ctx->buflen, 0,
+ BLAKE2B_BLOCK_SIZE - ctx->buflen); /* Padding */
+ blake2b_compress(ctx, ctx->buf, 1, ctx->buflen);
+ cpu_to_le64_array(ctx->h, ARRAY_SIZE(ctx->h));
+ memcpy(out, ctx->h, ctx->outlen);
+ memzero_explicit(ctx, sizeof(*ctx));
+}
+EXPORT_SYMBOL(blake2b_final);
+
+#ifdef blake2b_mod_init_arch
+static int __init blake2b_mod_init(void)
+{
+ blake2b_mod_init_arch();
+ return 0;
+}
+subsys_initcall(blake2b_mod_init);
+
+static void __exit blake2b_mod_exit(void)
+{
+}
+module_exit(blake2b_mod_exit);
+#endif
+
+MODULE_DESCRIPTION("BLAKE2b hash function");
+MODULE_LICENSE("GPL");
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 07/10] lib/crypto: arm/blake2b: Migrate optimized code into library
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (5 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 06/10] lib/crypto: blake2b: Add BLAKE2b library functions Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-19 16:32 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 08/10] lib/crypto: tests: Add KUnit tests for BLAKE2b Eric Biggers
` (4 subsequent siblings)
11 siblings, 1 reply; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Migrate the arm-optimized BLAKE2b code from arch/arm/crypto/ to
lib/crypto/arm/. This makes the BLAKE2b library able to use it, and it
also simplifies the code because it's easier to integrate with the
library than crypto_shash.
This temporarily makes the arm-optimized BLAKE2b code unavailable via
crypto_shash. A later commit reimplements the blake2b-* crypto_shash
algorithms on top of the BLAKE2b library API, making it available again.
Note that as per the lib/crypto/ convention, the optimized code is now
enabled by default. So, this also fixes the longstanding issue where
the optimized BLAKE2b code was not enabled by default.
To see the diff from arch/arm/crypto/blake2b-neon-glue.c to
lib/crypto/arm/blake2b.h, view this commit with 'git show -M10'.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/arm/crypto/Kconfig | 16 ---
arch/arm/crypto/Makefile | 2 -
arch/arm/crypto/blake2b-neon-glue.c | 104 ------------------
lib/crypto/Kconfig | 1 +
lib/crypto/Makefile | 1 +
.../crypto/arm}/blake2b-neon-core.S | 29 ++---
lib/crypto/arm/blake2b.h | 41 +++++++
7 files changed, 59 insertions(+), 135 deletions(-)
delete mode 100644 arch/arm/crypto/blake2b-neon-glue.c
rename {arch/arm/crypto => lib/crypto/arm}/blake2b-neon-core.S (94%)
create mode 100644 lib/crypto/arm/blake2b.h
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index c436eec22d86c..f30d743df2643 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -31,26 +31,10 @@ config CRYPTO_NHPOLY1305_NEON
NHPoly1305 hash function (Adiantum)
Architecture: arm using:
- NEON (Advanced SIMD) extensions
-config CRYPTO_BLAKE2B_NEON
- tristate "Hash functions: BLAKE2b (NEON)"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLAKE2B
- help
- BLAKE2b cryptographic hash function (RFC 7693)
-
- Architecture: arm using
- - NEON (Advanced SIMD) extensions
-
- BLAKE2b digest algorithm optimized with ARM NEON instructions.
- On ARM processors that have NEON support but not the ARMv8
- Crypto Extensions, typically this BLAKE2b implementation is
- much faster than the SHA-2 family and slightly faster than
- SHA-1.
-
config CRYPTO_AES_ARM
tristate "Ciphers: AES"
select CRYPTO_ALGAPI
select CRYPTO_AES
help
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 6346a73effc06..86dd43313dbfd 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -3,17 +3,15 @@
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
-obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
-blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o
aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c
deleted file mode 100644
index 2ff443a91724f..0000000000000
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * BLAKE2b digest algorithm, NEON accelerated
- *
- * Copyright 2020 Google LLC
- */
-
-#include <crypto/internal/blake2b.h>
-#include <crypto/internal/hash.h>
-
-#include <linux/module.h>
-#include <linux/sizes.h>
-
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void blake2b_compress_neon(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
-static void blake2b_compress_arch(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc)
-{
- do {
- const size_t blocks = min_t(size_t, nblocks,
- SZ_4K / BLAKE2B_BLOCK_SIZE);
-
- kernel_neon_begin();
- blake2b_compress_neon(state, block, blocks, inc);
- kernel_neon_end();
-
- nblocks -= blocks;
- block += blocks * BLAKE2B_BLOCK_SIZE;
- } while (nblocks);
-}
-
-static int crypto_blake2b_update_neon(struct shash_desc *desc,
- const u8 *in, unsigned int inlen)
-{
- return crypto_blake2b_update_bo(desc, in, inlen, blake2b_compress_arch);
-}
-
-static int crypto_blake2b_finup_neon(struct shash_desc *desc, const u8 *in,
- unsigned int inlen, u8 *out)
-{
- return crypto_blake2b_finup(desc, in, inlen, out,
- blake2b_compress_arch);
-}
-
-#define BLAKE2B_ALG(name, driver_name, digest_size) \
- { \
- .base.cra_name = name, \
- .base.cra_driver_name = driver_name, \
- .base.cra_priority = 200, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \
- CRYPTO_AHASH_ALG_BLOCK_ONLY | \
- CRYPTO_AHASH_ALG_FINAL_NONZERO, \
- .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
- .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
- .base.cra_module = THIS_MODULE, \
- .digestsize = digest_size, \
- .setkey = crypto_blake2b_setkey, \
- .init = crypto_blake2b_init, \
- .update = crypto_blake2b_update_neon, \
- .finup = crypto_blake2b_finup_neon, \
- .descsize = sizeof(struct blake2b_state), \
- .statesize = BLAKE2B_STATE_SIZE, \
- }
-
-static struct shash_alg blake2b_neon_algs[] = {
- BLAKE2B_ALG("blake2b-160", "blake2b-160-neon", BLAKE2B_160_HASH_SIZE),
- BLAKE2B_ALG("blake2b-256", "blake2b-256-neon", BLAKE2B_256_HASH_SIZE),
- BLAKE2B_ALG("blake2b-384", "blake2b-384-neon", BLAKE2B_384_HASH_SIZE),
- BLAKE2B_ALG("blake2b-512", "blake2b-512-neon", BLAKE2B_512_HASH_SIZE),
-};
-
-static int __init blake2b_neon_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
-}
-
-static void __exit blake2b_neon_mod_exit(void)
-{
- crypto_unregister_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
-}
-
-module_init(blake2b_neon_mod_init);
-module_exit(blake2b_neon_mod_exit);
-
-MODULE_DESCRIPTION("BLAKE2b digest algorithm, NEON accelerated");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
-MODULE_ALIAS_CRYPTO("blake2b-160");
-MODULE_ALIAS_CRYPTO("blake2b-160-neon");
-MODULE_ALIAS_CRYPTO("blake2b-256");
-MODULE_ALIAS_CRYPTO("blake2b-256-neon");
-MODULE_ALIAS_CRYPTO("blake2b-384");
-MODULE_ALIAS_CRYPTO("blake2b-384-neon");
-MODULE_ALIAS_CRYPTO("blake2b-512");
-MODULE_ALIAS_CRYPTO("blake2b-512-neon");
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 045fd79cc1bed..56456eb786bf3 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -35,10 +35,11 @@ config CRYPTO_LIB_BLAKE2B
the functions from <crypto/blake2b.h>.
config CRYPTO_LIB_BLAKE2B_ARCH
bool
depends on CRYPTO_LIB_BLAKE2B && !UML
+ default y if ARM && KERNEL_MODE_NEON
# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option.
config CRYPTO_LIB_BLAKE2S_ARCH
bool
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index f863417b16817..5c9a933928188 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -34,10 +34,11 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
obj-$(CONFIG_CRYPTO_LIB_BLAKE2B) += libblake2b.o
libblake2b-y := blake2b.o
CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
+obj-$(CONFIG_ARM) += arm/blake2b-neon-core.o
endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
################################################################################
# blake2s is used by the /dev/random driver which is always builtin
diff --git a/arch/arm/crypto/blake2b-neon-core.S b/lib/crypto/arm/blake2b-neon-core.S
similarity index 94%
rename from arch/arm/crypto/blake2b-neon-core.S
rename to lib/crypto/arm/blake2b-neon-core.S
index 0406a186377fb..b55c37f0b88fb 100644
--- a/arch/arm/crypto/blake2b-neon-core.S
+++ b/lib/crypto/arm/blake2b-neon-core.S
@@ -1,8 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * BLAKE2b digest algorithm, NEON accelerated
+ * BLAKE2b digest algorithm optimized with ARM NEON instructions. On ARM
+ * processors that have NEON support but not the ARMv8 Crypto Extensions,
+ * typically this BLAKE2b implementation is much faster than the SHA-2 family
+ * and slightly faster than SHA-1.
*
* Copyright 2020 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
@@ -11,12 +14,12 @@
.text
.fpu neon
// The arguments to blake2b_compress_neon()
- STATE .req r0
- BLOCK .req r1
+ CTX .req r0
+ DATA .req r1
NBLOCKS .req r2
INC .req r3
// Pointers to the rotation tables
ROR24_TABLE .req r4
@@ -232,14 +235,14 @@
vld1.8 {q8-q9}, [sp, :256]
.endif
.endm
//
-// void blake2b_compress_neon(struct blake2b_state *state,
-// const u8 *block, size_t nblocks, u32 inc);
+// void blake2b_compress_neon(struct blake2b_ctx *ctx,
+// const u8 *data, size_t nblocks, u32 inc);
//
-// Only the first three fields of struct blake2b_state are used:
+// Only the first three fields of struct blake2b_ctx are used:
// u64 h[8]; (inout)
// u64 t[2]; (inout)
// u64 f[2]; (in)
//
.align 5
@@ -253,11 +256,11 @@ ENTRY(blake2b_compress_neon)
mov sp, ip
adr ROR24_TABLE, .Lror24_table
adr ROR16_TABLE, .Lror16_table
- mov ip, STATE
+ mov ip, CTX
vld1.64 {q0-q1}, [ip]! // Load h[0..3]
vld1.64 {q2-q3}, [ip]! // Load h[4..7]
.Lnext_block:
adr r10, .Lblake2b_IV
vld1.64 {q14-q15}, [ip] // Load t[0..1] and f[0..1]
@@ -279,18 +282,18 @@ ENTRY(blake2b_compress_neon)
// registers than the state registers, as the message doesn't change.
// Therefore we store a copy of the first 32 bytes of the message block
// (q8-q9) in an aligned buffer on the stack so that they can be
// reloaded when needed. (We could just reload directly from the
// message buffer, but it's faster to use aligned loads.)
- vld1.8 {q8-q9}, [BLOCK]!
+ vld1.8 {q8-q9}, [DATA]!
veor q6, q6, q14 // v[12..13] = IV[4..5] ^ t[0..1]
- vld1.8 {q10-q11}, [BLOCK]!
+ vld1.8 {q10-q11}, [DATA]!
veor q7, q7, q15 // v[14..15] = IV[6..7] ^ f[0..1]
- vld1.8 {q12-q13}, [BLOCK]!
+ vld1.8 {q12-q13}, [DATA]!
vst1.8 {q8-q9}, [sp, :256]
- mov ip, STATE
- vld1.8 {q14-q15}, [BLOCK]!
+ mov ip, CTX
+ vld1.8 {q14-q15}, [DATA]!
// Execute the rounds. Each round is provided the order in which it
// needs to use the message words.
_blake2b_round 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
_blake2b_round 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3
@@ -317,11 +320,11 @@ ENTRY(blake2b_compress_neon)
vld1.64 {q10-q11}, [ip] // Load old h[4..7]
veor q2, q2, q6 // v[4..5] ^= v[12..13]
veor q3, q3, q7 // v[6..7] ^= v[14..15]
veor q0, q0, q8 // v[0..1] ^= h[0..1]
veor q1, q1, q9 // v[2..3] ^= h[2..3]
- mov ip, STATE
+ mov ip, CTX
subs NBLOCKS, NBLOCKS, #1 // nblocks--
vst1.64 {q0-q1}, [ip]! // Store new h[0..3]
veor q2, q2, q10 // v[4..5] ^= h[4..5]
veor q3, q3, q11 // v[6..7] ^= h[6..7]
vst1.64 {q2-q3}, [ip]! // Store new h[4..7]
diff --git a/lib/crypto/arm/blake2b.h b/lib/crypto/arm/blake2b.h
new file mode 100644
index 0000000000000..1b9154d119db4
--- /dev/null
+++ b/lib/crypto/arm/blake2b.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * BLAKE2b digest algorithm, NEON accelerated
+ *
+ * Copyright 2020 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+asmlinkage void blake2b_compress_neon(struct blake2b_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc);
+
+static void blake2b_compress(struct blake2b_ctx *ctx,
+ const u8 *data, size_t nblocks, u32 inc)
+{
+ if (!static_branch_likely(&have_neon) || !may_use_simd()) {
+ blake2b_compress_generic(ctx, data, nblocks, inc);
+ return;
+ }
+ do {
+ const size_t blocks = min_t(size_t, nblocks,
+ SZ_4K / BLAKE2B_BLOCK_SIZE);
+
+ kernel_neon_begin();
+ blake2b_compress_neon(ctx, data, blocks, inc);
+ kernel_neon_end();
+
+ data += blocks * BLAKE2B_BLOCK_SIZE;
+ nblocks -= blocks;
+ } while (nblocks);
+}
+
+#define blake2b_mod_init_arch blake2b_mod_init_arch
+static void blake2b_mod_init_arch(void)
+{
+ if (elf_hwcap & HWCAP_NEON)
+ static_branch_enable(&have_neon);
+}
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH 07/10] lib/crypto: arm/blake2b: Migrate optimized code into library
2025-10-18 4:31 ` [PATCH 07/10] lib/crypto: arm/blake2b: Migrate optimized code into library Eric Biggers
@ 2025-10-19 16:32 ` Eric Biggers
0 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-19 16:32 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld
On Fri, Oct 17, 2025 at 09:31:03PM -0700, Eric Biggers wrote:
> diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
> index f863417b16817..5c9a933928188 100644
> --- a/lib/crypto/Makefile
> +++ b/lib/crypto/Makefile
> @@ -34,10 +34,11 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o
> obj-$(CONFIG_CRYPTO_LIB_BLAKE2B) += libblake2b.o
> libblake2b-y := blake2b.o
> CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
> ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
> CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
> +obj-$(CONFIG_ARM) += arm/blake2b-neon-core.o
> endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
Correction: it should be
libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o
- Eric
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH 08/10] lib/crypto: tests: Add KUnit tests for BLAKE2b
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (6 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 07/10] lib/crypto: arm/blake2b: Migrate optimized code into library Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 09/10] crypto: blake2b - Reimplement using library API Eric Biggers
` (3 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Add a KUnit test suite for the BLAKE2b library API, mirroring the
BLAKE2s test suite very closely.
As with the BLAKE2s test suite, a benchmark is included.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/tests/Kconfig | 9 +
lib/crypto/tests/Makefile | 1 +
lib/crypto/tests/blake2b-testvecs.h | 342 ++++++++++++++++++++++++++++
lib/crypto/tests/blake2b_kunit.c | 133 +++++++++++
scripts/crypto/gen-hash-testvecs.py | 29 +--
5 files changed, 501 insertions(+), 13 deletions(-)
create mode 100644 lib/crypto/tests/blake2b-testvecs.h
create mode 100644 lib/crypto/tests/blake2b_kunit.c
diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig
index 578af717e13a7..2ebfd681bae4d 100644
--- a/lib/crypto/tests/Kconfig
+++ b/lib/crypto/tests/Kconfig
@@ -1,7 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-or-later
+config CRYPTO_LIB_BLAKE2B_KUNIT_TEST
+ tristate "KUnit tests for BLAKE2b" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS
+ select CRYPTO_LIB_BENCHMARK_VISIBLE
+ select CRYPTO_LIB_BLAKE2B
+ help
+ KUnit tests for the BLAKE2b cryptographic hash function.
+
config CRYPTO_LIB_BLAKE2S_KUNIT_TEST
tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS
depends on KUNIT
default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS
select CRYPTO_LIB_BENCHMARK_VISIBLE
diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile
index a71fad19922ba..f21a48a4415d0 100644
--- a/lib/crypto/tests/Makefile
+++ b/lib/crypto/tests/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-or-later
+obj-$(CONFIG_CRYPTO_LIB_BLAKE2B_KUNIT_TEST) += blake2b_kunit.o
obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o
obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o
obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o
obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o
obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o
diff --git a/lib/crypto/tests/blake2b-testvecs.h b/lib/crypto/tests/blake2b-testvecs.h
new file mode 100644
index 0000000000000..9e407dbc219c9
--- /dev/null
+++ b/lib/crypto/tests/blake2b-testvecs.h
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py blake2b */
+
+static const struct {
+ size_t data_len;
+ u8 digest[BLAKE2B_HASH_SIZE];
+} hash_testvecs[] = {
+ {
+ .data_len = 0,
+ .digest = {
+ 0x78, 0x6a, 0x02, 0xf7, 0x42, 0x01, 0x59, 0x03,
+ 0xc6, 0xc6, 0xfd, 0x85, 0x25, 0x52, 0xd2, 0x72,
+ 0x91, 0x2f, 0x47, 0x40, 0xe1, 0x58, 0x47, 0x61,
+ 0x8a, 0x86, 0xe2, 0x17, 0xf7, 0x1f, 0x54, 0x19,
+ 0xd2, 0x5e, 0x10, 0x31, 0xaf, 0xee, 0x58, 0x53,
+ 0x13, 0x89, 0x64, 0x44, 0x93, 0x4e, 0xb0, 0x4b,
+ 0x90, 0x3a, 0x68, 0x5b, 0x14, 0x48, 0xb7, 0x55,
+ 0xd5, 0x6f, 0x70, 0x1a, 0xfe, 0x9b, 0xe2, 0xce,
+ },
+ },
+ {
+ .data_len = 1,
+ .digest = {
+ 0x6f, 0x2e, 0xcc, 0x83, 0x53, 0xa3, 0x20, 0x16,
+ 0x5b, 0xda, 0xd0, 0x04, 0xd3, 0xcb, 0xe4, 0x37,
+ 0x5b, 0xf0, 0x84, 0x36, 0xe1, 0xad, 0x45, 0xcc,
+ 0x4d, 0x7f, 0x09, 0x68, 0xb2, 0x62, 0x93, 0x7f,
+ 0x72, 0x32, 0xe8, 0xa7, 0x2f, 0x1f, 0x6f, 0xc6,
+ 0x14, 0xd6, 0x70, 0xae, 0x0c, 0xf0, 0xf3, 0xce,
+ 0x64, 0x4d, 0x22, 0xdf, 0xc7, 0xa7, 0xf8, 0xa8,
+ 0x18, 0x23, 0xd8, 0x6c, 0xaf, 0x65, 0xa2, 0x54,
+ },
+ },
+ {
+ .data_len = 2,
+ .digest = {
+ 0x04, 0x13, 0xe2, 0x10, 0xbe, 0x65, 0xde, 0xce,
+ 0x61, 0xa8, 0xe0, 0xd6, 0x35, 0xb1, 0xb8, 0x88,
+ 0xd2, 0xea, 0x45, 0x3a, 0xe1, 0x8d, 0x94, 0xb5,
+ 0x66, 0x06, 0x98, 0x96, 0x39, 0xf8, 0x0e, 0xcb,
+ 0x34, 0xa6, 0xa8, 0x17, 0xfe, 0x56, 0xbc, 0xa9,
+ 0x5e, 0x1b, 0xb1, 0xde, 0x3c, 0xc7, 0x78, 0x4f,
+ 0x39, 0xc6, 0xfc, 0xa8, 0xb3, 0x27, 0x66, 0x3e,
+ 0x4e, 0xb5, 0x5d, 0x08, 0x89, 0xee, 0xd1, 0xe0,
+ },
+ },
+ {
+ .data_len = 3,
+ .digest = {
+ 0x2b, 0x4a, 0xa3, 0x4e, 0x2b, 0x7a, 0x47, 0x20,
+ 0x30, 0x5b, 0x09, 0x17, 0x3a, 0xf4, 0xcc, 0xf0,
+ 0xf7, 0x7b, 0x97, 0x68, 0x98, 0x9f, 0x4f, 0x09,
+ 0x46, 0x25, 0xe7, 0xd6, 0x53, 0x6b, 0xf9, 0x68,
+ 0x48, 0x12, 0x44, 0x8c, 0x9a, 0xc8, 0xd4, 0x42,
+ 0xeb, 0x2c, 0x5f, 0x41, 0xba, 0x17, 0xd0, 0xc3,
+ 0xad, 0xfd, 0xfb, 0x42, 0x33, 0xcb, 0x08, 0x5d,
+ 0xd2, 0x5c, 0x3d, 0xde, 0x87, 0x4d, 0xd6, 0xe4,
+ },
+ },
+ {
+ .data_len = 16,
+ .digest = {
+ 0xbf, 0x40, 0xf2, 0x38, 0x44, 0x8e, 0x24, 0x5e,
+ 0xbc, 0x67, 0xbb, 0xf0, 0x10, 0x9a, 0x79, 0xbb,
+ 0x36, 0x55, 0xce, 0xd2, 0xba, 0x04, 0x0d, 0xe8,
+ 0x30, 0x29, 0x5c, 0x2a, 0xa6, 0x3a, 0x4f, 0x37,
+ 0xac, 0x5f, 0xd4, 0x13, 0xa2, 0xf4, 0xfe, 0x80,
+ 0x61, 0xd7, 0x58, 0x66, 0x0c, 0x7f, 0xa2, 0x56,
+ 0x6b, 0x52, 0x7c, 0x22, 0x73, 0x7f, 0x17, 0xaa,
+ 0x91, 0x5a, 0x22, 0x06, 0xd9, 0x00, 0x48, 0x12,
+ },
+ },
+ {
+ .data_len = 32,
+ .digest = {
+ 0x41, 0x04, 0x65, 0x93, 0x81, 0x9a, 0x20, 0x0a,
+ 0x00, 0x60, 0x00, 0x64, 0x4c, 0x04, 0x3d, 0xe0,
+ 0x6b, 0x17, 0x0c, 0xe1, 0x0e, 0x28, 0x8b, 0xa0,
+ 0x76, 0xd2, 0x79, 0xb0, 0x33, 0x60, 0x61, 0x27,
+ 0xf2, 0x64, 0xf1, 0x8a, 0xe5, 0x3e, 0xaa, 0x37,
+ 0x60, 0xad, 0x2d, 0x75, 0x13, 0xae, 0xd8, 0x9e,
+ 0xec, 0xe0, 0xe4, 0x40, 0x2f, 0x59, 0x44, 0xb0,
+ 0x66, 0x7a, 0x68, 0x38, 0xce, 0x21, 0x99, 0x2a,
+ },
+ },
+ {
+ .data_len = 48,
+ .digest = {
+ 0x19, 0x6f, 0x9d, 0xc7, 0x87, 0x12, 0x5c, 0xa3,
+ 0xe2, 0xd3, 0xf1, 0x82, 0xec, 0xf3, 0x55, 0x9c,
+ 0x86, 0xd1, 0x6d, 0xde, 0xcf, 0x5b, 0xec, 0x4c,
+ 0x43, 0x25, 0x85, 0x90, 0xef, 0xe8, 0xe3, 0x5f,
+ 0x2c, 0x3a, 0x84, 0x07, 0xb8, 0x55, 0xfd, 0x5e,
+ 0xa4, 0x45, 0xf2, 0xac, 0xe4, 0xbd, 0xc7, 0x96,
+ 0x80, 0x59, 0x3e, 0xc9, 0xb1, 0x60, 0xb1, 0x2b,
+ 0x17, 0x49, 0x7d, 0x3e, 0x7d, 0x4d, 0x70, 0x24,
+ },
+ },
+ {
+ .data_len = 49,
+ .digest = {
+ 0x73, 0x72, 0xd5, 0x0a, 0x97, 0xb4, 0x7d, 0xdb,
+ 0x05, 0x14, 0x8e, 0x40, 0xc2, 0x9a, 0x8a, 0x74,
+ 0x4b, 0xda, 0x7e, 0xfc, 0x97, 0x57, 0x23, 0x39,
+ 0xdc, 0x57, 0x09, 0x13, 0x24, 0xfc, 0xf3, 0x23,
+ 0x55, 0x48, 0xdd, 0xe5, 0x07, 0x9a, 0x6f, 0x7b,
+ 0x62, 0xea, 0x4d, 0x79, 0xb4, 0xb9, 0xc5, 0x86,
+ 0xc0, 0x34, 0xd6, 0xd2, 0x6c, 0xc3, 0x94, 0xfb,
+ 0x34, 0xd6, 0x62, 0xae, 0xb8, 0x99, 0xf1, 0x38,
+ },
+ },
+ {
+ .data_len = 63,
+ .digest = {
+ 0x42, 0x3a, 0xe3, 0xa2, 0xae, 0x5a, 0x28, 0xce,
+ 0xf1, 0x3c, 0x97, 0xc2, 0x34, 0xf6, 0xb5, 0x1e,
+ 0xfc, 0x31, 0xb4, 0x04, 0x61, 0xb7, 0x54, 0x0b,
+ 0x0d, 0x1a, 0x22, 0x9c, 0x04, 0x67, 0x5c, 0x4c,
+ 0x75, 0x1b, 0x10, 0x0b, 0x99, 0xe2, 0xb1, 0x5e,
+ 0x5d, 0x4b, 0x7a, 0xe6, 0xf6, 0xb5, 0x62, 0xee,
+ 0x2d, 0x44, 0x57, 0xb2, 0x96, 0x73, 0x5e, 0xb9,
+ 0x6a, 0xb2, 0xb3, 0x16, 0xa3, 0xd9, 0x6a, 0x60,
+ },
+ },
+ {
+ .data_len = 64,
+ .digest = {
+ 0x50, 0xb9, 0xbe, 0xb2, 0x69, 0x07, 0x45, 0x5b,
+ 0x59, 0xde, 0x8d, 0xbf, 0x08, 0xdc, 0x2e, 0x7f,
+ 0x93, 0x29, 0xc1, 0x91, 0xe8, 0x74, 0x03, 0x89,
+ 0x20, 0xfb, 0xb2, 0x4b, 0xe8, 0x68, 0x6f, 0xe1,
+ 0xb4, 0x30, 0xbe, 0x11, 0x3c, 0x43, 0x19, 0x66,
+ 0x72, 0x78, 0xb7, 0xf4, 0xe9, 0x09, 0x18, 0x4e,
+ 0xae, 0x4a, 0x24, 0xe0, 0x6f, 0x44, 0x02, 0xe3,
+ 0xfd, 0xda, 0xb3, 0x3e, 0x3c, 0x6d, 0x54, 0x2e,
+ },
+ },
+ {
+ .data_len = 65,
+ .digest = {
+ 0xd6, 0xf2, 0xa9, 0x61, 0x3f, 0xce, 0x2a, 0x68,
+ 0x19, 0x86, 0xff, 0xd1, 0xee, 0x89, 0x3b, 0xa4,
+ 0x10, 0x9a, 0x91, 0x50, 0x35, 0x48, 0x9e, 0xf5,
+ 0x9c, 0x95, 0xe0, 0xfb, 0x92, 0x0f, 0xa8, 0xf7,
+ 0x6c, 0x43, 0x85, 0xf1, 0x6e, 0x11, 0x4e, 0x67,
+ 0x78, 0xd7, 0x53, 0x25, 0x0c, 0xf8, 0xce, 0x38,
+ 0x74, 0x08, 0xb0, 0x3c, 0x53, 0x20, 0x4d, 0xc4,
+ 0x9a, 0xf5, 0x78, 0xe8, 0x41, 0x8f, 0xed, 0x1f,
+ },
+ },
+ {
+ .data_len = 127,
+ .digest = {
+ 0xe8, 0xb2, 0xc5, 0xa7, 0xf5, 0xfa, 0xee, 0xa0,
+ 0x57, 0xba, 0x58, 0xf9, 0x0a, 0xf2, 0x64, 0x16,
+ 0xa8, 0xa6, 0x03, 0x85, 0x3b, 0xb8, 0x6f, 0xca,
+ 0x76, 0xc3, 0xa1, 0x2b, 0xec, 0xef, 0xc4, 0x66,
+ 0x11, 0xdf, 0x03, 0x85, 0x9d, 0x0c, 0x37, 0x7b,
+ 0xa9, 0x7b, 0x44, 0xfb, 0x11, 0x8f, 0x3f, 0x71,
+ 0xcd, 0x81, 0x43, 0x2e, 0x71, 0x5c, 0x54, 0x9f,
+ 0xca, 0x0f, 0x01, 0x91, 0xca, 0xaa, 0x93, 0xe9,
+ },
+ },
+ {
+ .data_len = 128,
+ .digest = {
+ 0x05, 0x8e, 0x9d, 0xdc, 0xe9, 0x36, 0x3e, 0x73,
+ 0x63, 0x59, 0x69, 0x81, 0x0b, 0x8c, 0xc7, 0x9e,
+ 0xcc, 0xe7, 0x9c, 0x19, 0x54, 0xa7, 0x2f, 0x86,
+ 0xb5, 0xea, 0xae, 0x6d, 0xfe, 0x4e, 0x6e, 0x83,
+ 0x8d, 0x1a, 0x1c, 0x70, 0x3f, 0x34, 0xa1, 0x04,
+ 0x59, 0xd1, 0xbb, 0xaa, 0x58, 0xf7, 0xce, 0xfb,
+ 0x86, 0x66, 0x22, 0xfc, 0x78, 0x74, 0x6e, 0x85,
+ 0xf1, 0x59, 0x7d, 0x9e, 0x1c, 0x3b, 0xc6, 0x65,
+ },
+ },
+ {
+ .data_len = 129,
+ .digest = {
+ 0x6b, 0x1f, 0x7c, 0x9a, 0x65, 0x7f, 0x09, 0x61,
+ 0xe5, 0x04, 0x9a, 0xf1, 0x4b, 0x36, 0x8e, 0x41,
+ 0x86, 0xcf, 0x86, 0x19, 0xd8, 0xc9, 0x34, 0x70,
+ 0x67, 0xd1, 0x03, 0x72, 0x12, 0xf7, 0x27, 0x92,
+ 0x2e, 0x3d, 0x2b, 0x54, 0x9a, 0x48, 0xa4, 0xc2,
+ 0x61, 0xea, 0x6a, 0xe8, 0xdd, 0x07, 0x41, 0x85,
+ 0x58, 0x6d, 0xcd, 0x12, 0x0d, 0xbc, 0xb1, 0x23,
+ 0xb2, 0xdb, 0x24, 0x1f, 0xc4, 0xa7, 0xae, 0xda,
+ },
+ },
+ {
+ .data_len = 256,
+ .digest = {
+ 0x50, 0xd8, 0xdc, 0xb2, 0x50, 0x24, 0x7a, 0x49,
+ 0xb1, 0x00, 0x73, 0x16, 0x1f, 0xce, 0xf9, 0xe8,
+ 0x77, 0x0a, 0x27, 0x74, 0xc7, 0xeb, 0xf0, 0x62,
+ 0xb9, 0xf3, 0x24, 0xa6, 0x03, 0x18, 0x40, 0xde,
+ 0x9b, 0x1d, 0xa8, 0xd0, 0xbf, 0x66, 0xa3, 0xc1,
+ 0x31, 0x04, 0x95, 0xc7, 0xc3, 0xb7, 0x11, 0xe2,
+ 0x1e, 0x31, 0x49, 0x98, 0x06, 0xab, 0xf0, 0xe6,
+ 0x5c, 0xac, 0x88, 0x28, 0x0b, 0x3d, 0xb2, 0xc2,
+ },
+ },
+ {
+ .data_len = 511,
+ .digest = {
+ 0xd4, 0x2b, 0x6b, 0x9e, 0xfc, 0x44, 0xc0, 0x90,
+ 0x64, 0x77, 0x5d, 0xf3, 0x44, 0xb6, 0x92, 0x8f,
+ 0x80, 0xe2, 0xe4, 0x9b, 0xaf, 0x49, 0x04, 0xea,
+ 0x29, 0xf7, 0x4a, 0x33, 0x3f, 0xc7, 0x3b, 0xab,
+ 0xa1, 0x71, 0x7f, 0xa2, 0x8e, 0x03, 0xa0, 0xd6,
+ 0xa7, 0xcd, 0xe0, 0xf8, 0xd7, 0x3b, 0xa4, 0x0d,
+ 0x84, 0x79, 0x12, 0x72, 0x3f, 0x8e, 0x48, 0x35,
+ 0x76, 0x4f, 0x56, 0xe9, 0x21, 0x40, 0x19, 0xbe,
+ },
+ },
+ {
+ .data_len = 513,
+ .digest = {
+ 0x84, 0xd4, 0xd8, 0x6c, 0x60, 0x3d, 0x6e, 0xfd,
+ 0x84, 0xb7, 0xdf, 0xba, 0x13, 0x5e, 0x07, 0x94,
+ 0x5b, 0x6b, 0x62, 0x1d, 0x82, 0x02, 0xa7, 0xb3,
+ 0x21, 0xdf, 0x42, 0x20, 0x85, 0xa8, 0x6f, 0x30,
+ 0xf7, 0x03, 0xba, 0x66, 0x0e, 0xa6, 0x42, 0x21,
+ 0x37, 0xe8, 0xed, 0x5b, 0x22, 0xf5, 0x4e, 0xa5,
+ 0xe5, 0x80, 0x1b, 0x47, 0xf0, 0x49, 0xb3, 0xe5,
+ 0x6e, 0xd9, 0xd9, 0x95, 0x3d, 0x2e, 0x42, 0x13,
+ },
+ },
+ {
+ .data_len = 1000,
+ .digest = {
+ 0x71, 0x17, 0xab, 0x93, 0xfe, 0x3b, 0xa4, 0xe6,
+ 0xcb, 0xb0, 0xea, 0x95, 0xe7, 0x1a, 0x01, 0xc0,
+ 0x12, 0x33, 0xfe, 0xcc, 0x79, 0x15, 0xae, 0x56,
+ 0xd2, 0x70, 0x44, 0x60, 0x54, 0x42, 0xa8, 0x69,
+ 0x7e, 0xc3, 0x90, 0xa0, 0x0c, 0x63, 0x39, 0xff,
+ 0x55, 0x53, 0xb8, 0x46, 0xef, 0x06, 0xcb, 0xba,
+ 0x73, 0xf4, 0x76, 0x22, 0xf1, 0x60, 0x98, 0xbc,
+ 0xbf, 0x76, 0x95, 0x85, 0x13, 0x1d, 0x11, 0x3b,
+ },
+ },
+ {
+ .data_len = 3333,
+ .digest = {
+ 0x3a, 0xaa, 0x85, 0xa0, 0x8c, 0x8e, 0xe1, 0x9c,
+ 0x9b, 0x43, 0x72, 0x7f, 0x40, 0x88, 0x3b, 0xd1,
+ 0xc4, 0xd8, 0x2b, 0x69, 0xa6, 0x74, 0x47, 0x69,
+ 0x5f, 0x7d, 0xab, 0x75, 0xa9, 0xf9, 0x88, 0x54,
+ 0xce, 0x57, 0xcc, 0x9d, 0xac, 0x13, 0x91, 0xdb,
+ 0x6d, 0x5c, 0xd8, 0xf4, 0x35, 0xc9, 0x30, 0xf0,
+ 0x4b, 0x91, 0x25, 0xab, 0x92, 0xa8, 0xc8, 0x6f,
+ 0xa0, 0xeb, 0x71, 0x56, 0x95, 0xab, 0xfd, 0xd7,
+ },
+ },
+ {
+ .data_len = 4096,
+ .digest = {
+ 0xe1, 0xe9, 0xbe, 0x6c, 0x96, 0xe2, 0xe8, 0xa6,
+ 0x53, 0xcd, 0x79, 0x77, 0x57, 0x51, 0x2f, 0xb2,
+ 0x9f, 0xfc, 0x09, 0xaa, 0x2c, 0xbc, 0x6c, 0x5f,
+ 0xb0, 0xf2, 0x12, 0x39, 0x54, 0xd7, 0x27, 0xf8,
+ 0x33, 0x5d, 0xd4, 0x8a, 0xca, 0xd8, 0x2e, 0xbb,
+ 0x02, 0x82, 0xca, 0x1b, 0x54, 0xfa, 0xd6, 0xf4,
+ 0x49, 0x63, 0xfc, 0xc8, 0x73, 0xd4, 0x26, 0x8d,
+ 0x4f, 0x1c, 0x56, 0xa7, 0xf4, 0x58, 0x6f, 0x51,
+ },
+ },
+ {
+ .data_len = 4128,
+ .digest = {
+ 0xf2, 0xf6, 0xe1, 0x16, 0x98, 0x69, 0x74, 0x5f,
+ 0x6c, 0xc4, 0x9d, 0x34, 0xa2, 0x84, 0x5d, 0x47,
+ 0xac, 0x39, 0xe0, 0x14, 0x2d, 0x78, 0xfa, 0x27,
+ 0xd5, 0x18, 0xaf, 0x26, 0x89, 0xa4, 0x69, 0xd3,
+ 0x56, 0xde, 0xfe, 0x4b, 0x9f, 0x0c, 0x9d, 0x5a,
+ 0x9a, 0x73, 0x3e, 0x3c, 0x76, 0x4b, 0x96, 0xca,
+ 0x49, 0xda, 0x05, 0x8c, 0x53, 0xbb, 0x85, 0x89,
+ 0x60, 0xc7, 0xe0, 0xb3, 0x51, 0x18, 0xd2, 0xd2,
+ },
+ },
+ {
+ .data_len = 4160,
+ .digest = {
+ 0xfc, 0x5c, 0xcf, 0xbf, 0x29, 0xe3, 0x01, 0xef,
+ 0x4b, 0x40, 0x70, 0x01, 0xca, 0x4d, 0x46, 0xce,
+ 0xa9, 0x95, 0x5d, 0xb4, 0xf1, 0x79, 0x29, 0xdb,
+ 0xac, 0x32, 0x3d, 0xd9, 0x60, 0x9e, 0x6b, 0xb8,
+ 0x28, 0x62, 0xb7, 0x4a, 0xbb, 0x33, 0xb9, 0xd0,
+ 0x83, 0xe0, 0xd7, 0x5a, 0x2d, 0x01, 0x4c, 0x61,
+ 0x9e, 0x7d, 0x2d, 0x2d, 0x60, 0x29, 0x5e, 0x60,
+ 0x10, 0xb7, 0x41, 0x00, 0x3f, 0xe5, 0xf7, 0x52,
+ },
+ },
+ {
+ .data_len = 4224,
+ .digest = {
+ 0xf8, 0xe5, 0x4b, 0xe5, 0x89, 0xf9, 0x1b, 0x43,
+ 0xbb, 0x65, 0x3d, 0xa0, 0xb4, 0xdc, 0x04, 0x26,
+ 0x68, 0x15, 0xae, 0x4d, 0xd6, 0x03, 0xb7, 0x27,
+ 0x06, 0x8c, 0x2a, 0x82, 0x51, 0x96, 0xbf, 0x83,
+ 0x38, 0x96, 0x21, 0x8a, 0xd9, 0xf9, 0x4e, 0x38,
+ 0xc6, 0xb3, 0xbd, 0xfe, 0xd3, 0x49, 0x90, 0xbc,
+ 0xa1, 0x77, 0xd0, 0xa0, 0x3c, 0x2b, 0x4e, 0x10,
+ 0x34, 0xc3, 0x17, 0x85, 0x3d, 0xec, 0xa8, 0x05,
+ },
+ },
+ {
+ .data_len = 16384,
+ .digest = {
+ 0x38, 0x56, 0xaf, 0x83, 0x68, 0x9c, 0xba, 0xe3,
+ 0xec, 0x51, 0xf5, 0xf4, 0x93, 0x48, 0x1d, 0xe6,
+ 0xad, 0xa8, 0x8c, 0x70, 0x2a, 0xd9, 0xaa, 0x43,
+ 0x04, 0x40, 0x95, 0xc1, 0xe6, 0x8a, 0xf5, 0x01,
+ 0x6b, 0x79, 0xd9, 0xb4, 0xd0, 0x1d, 0x93, 0x26,
+ 0xfe, 0xf5, 0x07, 0x57, 0xda, 0x08, 0x0a, 0x82,
+ 0xc9, 0x17, 0x13, 0x5b, 0x9e, 0x11, 0x96, 0xa5,
+ 0xd0, 0x92, 0xcd, 0xf1, 0xa3, 0x5b, 0x43, 0x21,
+ },
+ },
+};
+
+static const u8 hash_testvec_consolidated[BLAKE2B_HASH_SIZE] = {
+ 0xa4, 0xf8, 0xf6, 0xa1, 0x36, 0x89, 0xc0, 0x2a,
+ 0xc3, 0x42, 0x32, 0x71, 0xe5, 0xea, 0x14, 0x77,
+ 0xf3, 0x99, 0x91, 0x87, 0x49, 0xc2, 0x8d, 0xa5,
+ 0x2f, 0xed, 0x01, 0x35, 0x39, 0x64, 0x09, 0x25,
+ 0xe3, 0xa8, 0x50, 0x97, 0x35, 0x8b, 0xf5, 0x19,
+ 0x1e, 0xd5, 0x9f, 0x03, 0x0b, 0x65, 0x55, 0x0e,
+ 0xa0, 0xb7, 0xda, 0x18, 0x7b, 0x7f, 0x88, 0x55,
+ 0x1f, 0xdb, 0x82, 0x6b, 0x98, 0x90, 0x1c, 0xdd,
+};
+
+static const u8 blake2b_keyed_testvec_consolidated[BLAKE2B_HASH_SIZE] = {
+ 0x2b, 0x89, 0x36, 0x3a, 0x36, 0xe4, 0x18, 0x38,
+ 0xc4, 0x5b, 0x5c, 0xa5, 0x9a, 0xed, 0xf2, 0xee,
+ 0x5a, 0xb6, 0x82, 0x6c, 0x63, 0xf2, 0x29, 0x57,
+ 0xc7, 0xd5, 0x32, 0x27, 0xba, 0x88, 0xb1, 0xab,
+ 0xf2, 0x2a, 0xc1, 0xea, 0xf3, 0x91, 0x89, 0x66,
+ 0x47, 0x1e, 0x5b, 0xc6, 0x98, 0x12, 0xe9, 0x25,
+ 0xbf, 0x72, 0xd2, 0x3f, 0x88, 0x97, 0x17, 0x51,
+ 0xed, 0x96, 0xfb, 0xe9, 0xca, 0x52, 0x42, 0xc9,
+};
diff --git a/lib/crypto/tests/blake2b_kunit.c b/lib/crypto/tests/blake2b_kunit.c
new file mode 100644
index 0000000000000..bc0be7da1e76d
--- /dev/null
+++ b/lib/crypto/tests/blake2b_kunit.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2025 Google LLC
+ */
+#include <crypto/blake2b.h>
+#include "blake2b-testvecs.h"
+
+/*
+ * The following are compatibility functions that present BLAKE2b as an unkeyed
+ * hash function that produces hashes of fixed length BLAKE2B_HASH_SIZE, so that
+ * hash-test-template.h can be reused to test it.
+ */
+
+static void blake2b_default(const u8 *data, size_t len,
+ u8 out[BLAKE2B_HASH_SIZE])
+{
+ blake2b(NULL, 0, data, len, out, BLAKE2B_HASH_SIZE);
+}
+
+static void blake2b_init_default(struct blake2b_ctx *ctx)
+{
+ blake2b_init(ctx, BLAKE2B_HASH_SIZE);
+}
+
+/*
+ * Generate the HASH_KUNIT_CASES using hash-test-template.h. These test BLAKE2b
+ * with a key length of 0 and a hash length of BLAKE2B_HASH_SIZE.
+ */
+#define HASH blake2b_default
+#define HASH_CTX blake2b_ctx
+#define HASH_SIZE BLAKE2B_HASH_SIZE
+#define HASH_INIT blake2b_init_default
+#define HASH_UPDATE blake2b_update
+#define HASH_FINAL blake2b_final
+#include "hash-test-template.h"
+
+/*
+ * BLAKE2b specific test case which tests all possible combinations of key
+ * length and hash length.
+ */
+static void test_blake2b_all_key_and_hash_lens(struct kunit *test)
+{
+ const size_t data_len = 100;
+ u8 *data = &test_buf[0];
+ u8 *key = data + data_len;
+ u8 *hash = key + BLAKE2B_KEY_SIZE;
+ struct blake2b_ctx main_ctx;
+ u8 main_hash[BLAKE2B_HASH_SIZE];
+
+ rand_bytes_seeded_from_len(data, data_len);
+ blake2b_init(&main_ctx, BLAKE2B_HASH_SIZE);
+ for (int key_len = 0; key_len <= BLAKE2B_KEY_SIZE; key_len++) {
+ rand_bytes_seeded_from_len(key, key_len);
+ for (int out_len = 1; out_len <= BLAKE2B_HASH_SIZE; out_len++) {
+ blake2b(key, key_len, data, data_len, hash, out_len);
+ blake2b_update(&main_ctx, hash, out_len);
+ }
+ }
+ blake2b_final(&main_ctx, main_hash);
+ KUNIT_ASSERT_MEMEQ(test, main_hash, blake2b_keyed_testvec_consolidated,
+ BLAKE2B_HASH_SIZE);
+}
+
+/*
+ * BLAKE2b specific test case which tests using a guarded buffer for all allowed
+ * key lengths. Also tests both blake2b() and blake2b_init_key().
+ */
+static void test_blake2b_with_guarded_key_buf(struct kunit *test)
+{
+ const size_t data_len = 100;
+
+ rand_bytes(test_buf, data_len);
+ for (int key_len = 0; key_len <= BLAKE2B_KEY_SIZE; key_len++) {
+ u8 key[BLAKE2B_KEY_SIZE];
+ u8 *guarded_key = &test_buf[TEST_BUF_LEN - key_len];
+ u8 hash1[BLAKE2B_HASH_SIZE];
+ u8 hash2[BLAKE2B_HASH_SIZE];
+ struct blake2b_ctx ctx;
+
+ rand_bytes(key, key_len);
+ memcpy(guarded_key, key, key_len);
+
+ blake2b(key, key_len, test_buf, data_len,
+ hash1, BLAKE2B_HASH_SIZE);
+ blake2b(guarded_key, key_len, test_buf, data_len,
+ hash2, BLAKE2B_HASH_SIZE);
+ KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2B_HASH_SIZE);
+
+ blake2b_init_key(&ctx, BLAKE2B_HASH_SIZE, guarded_key, key_len);
+ blake2b_update(&ctx, test_buf, data_len);
+ blake2b_final(&ctx, hash2);
+ KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2B_HASH_SIZE);
+ }
+}
+
+/*
+ * BLAKE2b specific test case which tests using a guarded output buffer for all
+ * allowed output lengths.
+ */
+static void test_blake2b_with_guarded_out_buf(struct kunit *test)
+{
+ const size_t data_len = 100;
+
+ rand_bytes(test_buf, data_len);
+ for (int out_len = 1; out_len <= BLAKE2B_HASH_SIZE; out_len++) {
+ u8 hash[BLAKE2B_HASH_SIZE];
+ u8 *guarded_hash = &test_buf[TEST_BUF_LEN - out_len];
+
+ blake2b(NULL, 0, test_buf, data_len, hash, out_len);
+ blake2b(NULL, 0, test_buf, data_len, guarded_hash, out_len);
+ KUNIT_ASSERT_MEMEQ(test, hash, guarded_hash, out_len);
+ }
+}
+
+static struct kunit_case blake2b_test_cases[] = {
+ HASH_KUNIT_CASES,
+ KUNIT_CASE(test_blake2b_all_key_and_hash_lens),
+ KUNIT_CASE(test_blake2b_with_guarded_key_buf),
+ KUNIT_CASE(test_blake2b_with_guarded_out_buf),
+ KUNIT_CASE(benchmark_hash),
+ {},
+};
+
+static struct kunit_suite blake2b_test_suite = {
+ .name = "blake2b",
+ .test_cases = blake2b_test_cases,
+ .suite_init = hash_suite_init,
+ .suite_exit = hash_suite_exit,
+};
+kunit_test_suite(blake2b_test_suite);
+
+MODULE_DESCRIPTION("KUnit tests and benchmark for BLAKE2b");
+MODULE_LICENSE("GPL");
diff --git a/scripts/crypto/gen-hash-testvecs.py b/scripts/crypto/gen-hash-testvecs.py
index fc063f2ee95f1..c5b7985fe7280 100755
--- a/scripts/crypto/gen-hash-testvecs.py
+++ b/scripts/crypto/gen-hash-testvecs.py
@@ -83,12 +83,12 @@ def print_c_struct_u8_array_field(name, value):
print(f'\t\t.{name} = {{')
print_bytes('\t\t\t', value, 8)
print('\t\t},')
def alg_digest_size_const(alg):
- if alg == 'blake2s':
- return 'BLAKE2S_HASH_SIZE'
+ if alg.startswith('blake2'):
+ return f'{alg.upper()}_HASH_SIZE'
return f'{alg.upper()}_DIGEST_SIZE'
def gen_unkeyed_testvecs(alg):
print('')
print('static const struct {')
@@ -122,23 +122,26 @@ def gen_hmac_testvecs(alg):
ctx.update(mac)
print_static_u8_array_definition(
f'hmac_testvec_consolidated[{alg.upper()}_DIGEST_SIZE]',
ctx.digest())
-BLAKE2S_KEY_SIZE = 32
-BLAKE2S_HASH_SIZE = 32
-
-def gen_additional_blake2s_testvecs():
+def gen_additional_blake2_testvecs(alg):
+ if alg == 'blake2s':
+ (max_key_size, max_hash_size) = (32, 32)
+ elif alg == 'blake2b':
+ (max_key_size, max_hash_size) = (64, 64)
+ else:
+ raise ValueError(f'Unsupported alg: {alg}')
hashes = b''
- for key_len in range(BLAKE2S_KEY_SIZE + 1):
- for out_len in range(1, BLAKE2S_HASH_SIZE + 1):
- h = hashlib.blake2s(digest_size=out_len, key=rand_bytes(key_len))
+ for key_len in range(max_key_size + 1):
+ for out_len in range(1, max_hash_size + 1):
+ h = hashlib.new(alg, digest_size=out_len, key=rand_bytes(key_len))
h.update(rand_bytes(100))
hashes += h.digest()
print_static_u8_array_definition(
- 'blake2s_keyed_testvec_consolidated[BLAKE2S_HASH_SIZE]',
- compute_hash('blake2s', hashes))
+ f'{alg}_keyed_testvec_consolidated[{alg_digest_size_const(alg)}]',
+ compute_hash(alg, hashes))
def gen_additional_poly1305_testvecs():
key = b'\xff' * POLY1305_KEY_SIZE
data = b''
ctx = Poly1305(key)
@@ -158,11 +161,11 @@ if len(sys.argv) != 2:
alg = sys.argv[1]
print('/* SPDX-License-Identifier: GPL-2.0-or-later */')
print(f'/* This file was generated by: {sys.argv[0]} {" ".join(sys.argv[1:])} */')
gen_unkeyed_testvecs(alg)
-if alg == 'blake2s':
- gen_additional_blake2s_testvecs()
+if alg.startswith('blake2'):
+ gen_additional_blake2_testvecs(alg)
elif alg == 'poly1305':
gen_additional_poly1305_testvecs()
else:
gen_hmac_testvecs(alg)
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 09/10] crypto: blake2b - Reimplement using library API
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (7 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 08/10] lib/crypto: tests: Add KUnit tests for BLAKE2b Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-18 4:31 ` [PATCH 10/10] btrfs: switch to library APIs for checksums Eric Biggers
` (2 subsequent siblings)
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Replace blake2b_generic.c with a new file blake2b.c which implements the
BLAKE2b crypto_shash algorithms on top of the BLAKE2b library API.
Change the driver name suffix from "-generic" to "-lib" to reflect that
these algorithms now just use the (possibly arch-optimized) library.
This closely mirrors crypto/{md5,sha1,sha256,sha512}.c.
Remove include/crypto/internal/blake2b.h since it is no longer used.
Likewise, remove struct blake2b_state from include/crypto/blake2b.h.
Omit support for import_core and export_core, since there are no legacy
drivers that need these for these algorithms.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
crypto/Kconfig | 1 +
crypto/Makefile | 3 +-
crypto/blake2b.c | 111 +++++++++++++++++
crypto/blake2b_generic.c | 192 ------------------------------
crypto/testmgr.c | 4 +
include/crypto/blake2b.h | 10 --
include/crypto/internal/blake2b.h | 116 ------------------
7 files changed, 117 insertions(+), 320 deletions(-)
create mode 100644 crypto/blake2b.c
delete mode 100644 crypto/blake2b_generic.c
delete mode 100644 include/crypto/internal/blake2b.h
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a04595f9d0ca4..0a7e74ac870b0 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -879,10 +879,11 @@ endmenu
menu "Hashes, digests, and MACs"
config CRYPTO_BLAKE2B
tristate "BLAKE2b"
select CRYPTO_HASH
+ select CRYPTO_LIB_BLAKE2B
help
BLAKE2b cryptographic hash function (RFC 7693)
BLAKE2b is optimized for 64-bit platforms and can produce digests
of any size between 1 and 64 bytes. The keyed hash is also implemented.
diff --git a/crypto/Makefile b/crypto/Makefile
index e430e6e99b6a2..5b02ca2cb04e0 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -81,12 +81,11 @@ obj-$(CONFIG_CRYPTO_SHA512) += sha512.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o
obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
-obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
-CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
+obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b.o
obj-$(CONFIG_CRYPTO_ECB) += ecb.o
obj-$(CONFIG_CRYPTO_CBC) += cbc.o
obj-$(CONFIG_CRYPTO_PCBC) += pcbc.o
obj-$(CONFIG_CRYPTO_CTS) += cts.o
obj-$(CONFIG_CRYPTO_LRW) += lrw.o
diff --git a/crypto/blake2b.c b/crypto/blake2b.c
new file mode 100644
index 0000000000000..67a6dae43a54b
--- /dev/null
+++ b/crypto/blake2b.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Crypto API support for BLAKE2b
+ *
+ * Copyright 2025 Google LLC
+ */
+#include <crypto/blake2b.h>
+#include <crypto/internal/hash.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+struct blake2b_tfm_ctx {
+ unsigned int keylen;
+ u8 key[BLAKE2B_KEY_SIZE];
+};
+
+static int crypto_blake2b_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen > BLAKE2B_KEY_SIZE)
+ return -EINVAL;
+ memcpy(tctx->key, key, keylen);
+ tctx->keylen = keylen;
+ return 0;
+}
+
+#define BLAKE2B_CTX(desc) ((struct blake2b_ctx *)shash_desc_ctx(desc))
+
+static int crypto_blake2b_init(struct shash_desc *desc)
+{
+ const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ unsigned int digestsize = crypto_shash_digestsize(desc->tfm);
+
+ blake2b_init_key(BLAKE2B_CTX(desc), digestsize,
+ tctx->key, tctx->keylen);
+ return 0;
+}
+
+static int crypto_blake2b_update(struct shash_desc *desc,
+ const u8 *data, unsigned int len)
+{
+ blake2b_update(BLAKE2B_CTX(desc), data, len);
+ return 0;
+}
+
+static int crypto_blake2b_final(struct shash_desc *desc, u8 *out)
+{
+ blake2b_final(BLAKE2B_CTX(desc), out);
+ return 0;
+}
+
+static int crypto_blake2b_digest(struct shash_desc *desc,
+ const u8 *data, unsigned int len, u8 *out)
+{
+ const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ unsigned int digestsize = crypto_shash_digestsize(desc->tfm);
+
+ blake2b(tctx->key, tctx->keylen, data, len, out, digestsize);
+ return 0;
+}
+
+#define BLAKE2B_ALG(name, digest_size) \
+ { \
+ .base.cra_name = name, \
+ .base.cra_driver_name = name "-lib", \
+ .base.cra_priority = 300, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
+ .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
+ .base.cra_module = THIS_MODULE, \
+ .digestsize = digest_size, \
+ .setkey = crypto_blake2b_setkey, \
+ .init = crypto_blake2b_init, \
+ .update = crypto_blake2b_update, \
+ .final = crypto_blake2b_final, \
+ .digest = crypto_blake2b_digest, \
+ .descsize = sizeof(struct blake2b_ctx), \
+ }
+
+static struct shash_alg algs[] = {
+ BLAKE2B_ALG("blake2b-160", BLAKE2B_160_HASH_SIZE),
+ BLAKE2B_ALG("blake2b-256", BLAKE2B_256_HASH_SIZE),
+ BLAKE2B_ALG("blake2b-384", BLAKE2B_384_HASH_SIZE),
+ BLAKE2B_ALG("blake2b-512", BLAKE2B_512_HASH_SIZE),
+};
+
+static int __init crypto_blake2b_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+module_init(crypto_blake2b_mod_init);
+
+static void __exit crypto_blake2b_mod_exit(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+module_exit(crypto_blake2b_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crypto API support for BLAKE2b");
+
+MODULE_ALIAS_CRYPTO("blake2b-160");
+MODULE_ALIAS_CRYPTO("blake2b-160-lib");
+MODULE_ALIAS_CRYPTO("blake2b-256");
+MODULE_ALIAS_CRYPTO("blake2b-256-lib");
+MODULE_ALIAS_CRYPTO("blake2b-384");
+MODULE_ALIAS_CRYPTO("blake2b-384-lib");
+MODULE_ALIAS_CRYPTO("blake2b-512");
+MODULE_ALIAS_CRYPTO("blake2b-512-lib");
diff --git a/crypto/blake2b_generic.c b/crypto/blake2b_generic.c
deleted file mode 100644
index 60f0562175104..0000000000000
--- a/crypto/blake2b_generic.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR Apache-2.0)
-/*
- * Generic implementation of the BLAKE2b digest algorithm. Based on the BLAKE2b
- * reference implementation, but it has been heavily modified for use in the
- * kernel. The reference implementation was:
- *
- * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under
- * the terms of the CC0, the OpenSSL Licence, or the Apache Public License
- * 2.0, at your option. The terms of these licenses can be found at:
- *
- * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- * - OpenSSL license : https://www.openssl.org/source/license.html
- * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
- *
- * More information about BLAKE2 can be found at https://blake2.net.
- */
-
-#include <crypto/internal/blake2b.h>
-#include <crypto/internal/hash.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/unaligned.h>
-
-static const u8 blake2b_sigma[12][16] = {
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
- { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
- { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
- { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
- { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
- { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
- { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
- { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
- { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
- { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
- { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
-};
-
-static void blake2b_increment_counter(struct blake2b_state *S, const u64 inc)
-{
- S->t[0] += inc;
- S->t[1] += (S->t[0] < inc);
-}
-
-#define G(r,i,a,b,c,d) \
- do { \
- a = a + b + m[blake2b_sigma[r][2*i+0]]; \
- d = ror64(d ^ a, 32); \
- c = c + d; \
- b = ror64(b ^ c, 24); \
- a = a + b + m[blake2b_sigma[r][2*i+1]]; \
- d = ror64(d ^ a, 16); \
- c = c + d; \
- b = ror64(b ^ c, 63); \
- } while (0)
-
-#define ROUND(r) \
- do { \
- G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
- G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
- G(r,2,v[ 2],v[ 6],v[10],v[14]); \
- G(r,3,v[ 3],v[ 7],v[11],v[15]); \
- G(r,4,v[ 0],v[ 5],v[10],v[15]); \
- G(r,5,v[ 1],v[ 6],v[11],v[12]); \
- G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
- G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
- } while (0)
-
-static void blake2b_compress_one_generic(struct blake2b_state *S,
- const u8 block[BLAKE2B_BLOCK_SIZE])
-{
- u64 m[16];
- u64 v[16];
- size_t i;
-
- for (i = 0; i < 16; ++i)
- m[i] = get_unaligned_le64(block + i * sizeof(m[i]));
-
- for (i = 0; i < 8; ++i)
- v[i] = S->h[i];
-
- v[ 8] = BLAKE2B_IV0;
- v[ 9] = BLAKE2B_IV1;
- v[10] = BLAKE2B_IV2;
- v[11] = BLAKE2B_IV3;
- v[12] = BLAKE2B_IV4 ^ S->t[0];
- v[13] = BLAKE2B_IV5 ^ S->t[1];
- v[14] = BLAKE2B_IV6 ^ S->f[0];
- v[15] = BLAKE2B_IV7 ^ S->f[1];
-
- ROUND(0);
- ROUND(1);
- ROUND(2);
- ROUND(3);
- ROUND(4);
- ROUND(5);
- ROUND(6);
- ROUND(7);
- ROUND(8);
- ROUND(9);
- ROUND(10);
- ROUND(11);
-#ifdef CONFIG_CC_IS_CLANG
-#pragma nounroll /* https://llvm.org/pr45803 */
-#endif
- for (i = 0; i < 8; ++i)
- S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
-}
-
-#undef G
-#undef ROUND
-
-static void blake2b_compress_generic(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc)
-{
- do {
- blake2b_increment_counter(state, inc);
- blake2b_compress_one_generic(state, block);
- block += BLAKE2B_BLOCK_SIZE;
- } while (--nblocks);
-}
-
-static int crypto_blake2b_update_generic(struct shash_desc *desc,
- const u8 *in, unsigned int inlen)
-{
- return crypto_blake2b_update_bo(desc, in, inlen,
- blake2b_compress_generic);
-}
-
-static int crypto_blake2b_finup_generic(struct shash_desc *desc, const u8 *in,
- unsigned int inlen, u8 *out)
-{
- return crypto_blake2b_finup(desc, in, inlen, out,
- blake2b_compress_generic);
-}
-
-#define BLAKE2B_ALG(name, driver_name, digest_size) \
- { \
- .base.cra_name = name, \
- .base.cra_driver_name = driver_name, \
- .base.cra_priority = 100, \
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY | \
- CRYPTO_AHASH_ALG_BLOCK_ONLY | \
- CRYPTO_AHASH_ALG_FINAL_NONZERO, \
- .base.cra_blocksize = BLAKE2B_BLOCK_SIZE, \
- .base.cra_ctxsize = sizeof(struct blake2b_tfm_ctx), \
- .base.cra_module = THIS_MODULE, \
- .digestsize = digest_size, \
- .setkey = crypto_blake2b_setkey, \
- .init = crypto_blake2b_init, \
- .update = crypto_blake2b_update_generic, \
- .finup = crypto_blake2b_finup_generic, \
- .descsize = BLAKE2B_DESC_SIZE, \
- .statesize = BLAKE2B_STATE_SIZE, \
- }
-
-static struct shash_alg blake2b_algs[] = {
- BLAKE2B_ALG("blake2b-160", "blake2b-160-generic",
- BLAKE2B_160_HASH_SIZE),
- BLAKE2B_ALG("blake2b-256", "blake2b-256-generic",
- BLAKE2B_256_HASH_SIZE),
- BLAKE2B_ALG("blake2b-384", "blake2b-384-generic",
- BLAKE2B_384_HASH_SIZE),
- BLAKE2B_ALG("blake2b-512", "blake2b-512-generic",
- BLAKE2B_512_HASH_SIZE),
-};
-
-static int __init blake2b_mod_init(void)
-{
- return crypto_register_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs));
-}
-
-static void __exit blake2b_mod_fini(void)
-{
- crypto_unregister_shashes(blake2b_algs, ARRAY_SIZE(blake2b_algs));
-}
-
-module_init(blake2b_mod_init);
-module_exit(blake2b_mod_fini);
-
-MODULE_AUTHOR("David Sterba <kdave@kernel.org>");
-MODULE_DESCRIPTION("BLAKE2b generic implementation");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("blake2b-160");
-MODULE_ALIAS_CRYPTO("blake2b-160-generic");
-MODULE_ALIAS_CRYPTO("blake2b-256");
-MODULE_ALIAS_CRYPTO("blake2b-256-generic");
-MODULE_ALIAS_CRYPTO("blake2b-384");
-MODULE_ALIAS_CRYPTO("blake2b-384-generic");
-MODULE_ALIAS_CRYPTO("blake2b-512");
-MODULE_ALIAS_CRYPTO("blake2b-512-generic");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 6a490aaa71b9a..3ab7adc1cdce5 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4330,31 +4330,35 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "authenc(hmac(sha512),rfc3686(ctr(aes)))",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "blake2b-160",
+ .generic_driver = "blake2b-160-lib",
.test = alg_test_hash,
.fips_allowed = 0,
.suite = {
.hash = __VECS(blake2b_160_tv_template)
}
}, {
.alg = "blake2b-256",
+ .generic_driver = "blake2b-256-lib",
.test = alg_test_hash,
.fips_allowed = 0,
.suite = {
.hash = __VECS(blake2b_256_tv_template)
}
}, {
.alg = "blake2b-384",
+ .generic_driver = "blake2b-384-lib",
.test = alg_test_hash,
.fips_allowed = 0,
.suite = {
.hash = __VECS(blake2b_384_tv_template)
}
}, {
.alg = "blake2b-512",
+ .generic_driver = "blake2b-512-lib",
.test = alg_test_hash,
.fips_allowed = 0,
.suite = {
.hash = __VECS(blake2b_512_tv_template)
}
diff --git a/include/crypto/blake2b.h b/include/crypto/blake2b.h
index 4879e2ec26867..3bc37fd103a7a 100644
--- a/include/crypto/blake2b.h
+++ b/include/crypto/blake2b.h
@@ -5,24 +5,14 @@
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/string.h>
-struct blake2b_state {
- /* 'h', 't', and 'f' are used in assembly code, so keep them as-is. */
- u64 h[8];
- u64 t[2];
- /* The true state ends here. The rest is temporary storage. */
- u64 f[2];
-};
-
enum blake2b_lengths {
BLAKE2B_BLOCK_SIZE = 128,
BLAKE2B_HASH_SIZE = 64,
BLAKE2B_KEY_SIZE = 64,
- BLAKE2B_STATE_SIZE = offsetof(struct blake2b_state, f),
- BLAKE2B_DESC_SIZE = sizeof(struct blake2b_state),
BLAKE2B_160_HASH_SIZE = 20,
BLAKE2B_256_HASH_SIZE = 32,
BLAKE2B_384_HASH_SIZE = 48,
BLAKE2B_512_HASH_SIZE = 64,
diff --git a/include/crypto/internal/blake2b.h b/include/crypto/internal/blake2b.h
deleted file mode 100644
index 3712df69def18..0000000000000
--- a/include/crypto/internal/blake2b.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Helper functions for BLAKE2b implementations.
- * Keep this in sync with the corresponding BLAKE2s header.
- */
-
-#ifndef _CRYPTO_INTERNAL_BLAKE2B_H
-#define _CRYPTO_INTERNAL_BLAKE2B_H
-
-#include <asm/byteorder.h>
-#include <crypto/blake2b.h>
-#include <crypto/internal/hash.h>
-#include <linux/array_size.h>
-#include <linux/compiler.h>
-#include <linux/build_bug.h>
-#include <linux/errno.h>
-#include <linux/math.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-static inline void blake2b_set_lastblock(struct blake2b_state *state)
-{
- state->f[0] = -1;
- state->f[1] = 0;
-}
-
-static inline void blake2b_set_nonlast(struct blake2b_state *state)
-{
- state->f[0] = 0;
- state->f[1] = 0;
-}
-
-typedef void (*blake2b_compress_t)(struct blake2b_state *state,
- const u8 *block, size_t nblocks, u32 inc);
-
-/* Helper functions for shash implementations of BLAKE2b */
-
-struct blake2b_tfm_ctx {
- u8 key[BLAKE2B_BLOCK_SIZE];
- unsigned int keylen;
-};
-
-static inline int crypto_blake2b_setkey(struct crypto_shash *tfm,
- const u8 *key, unsigned int keylen)
-{
- struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(tfm);
-
- if (keylen > BLAKE2B_KEY_SIZE)
- return -EINVAL;
-
- BUILD_BUG_ON(BLAKE2B_KEY_SIZE > BLAKE2B_BLOCK_SIZE);
-
- memcpy(tctx->key, key, keylen);
- memset(tctx->key + keylen, 0, BLAKE2B_BLOCK_SIZE - keylen);
- tctx->keylen = keylen;
-
- return 0;
-}
-
-static inline void __crypto_blake2b_init(struct blake2b_state *state,
- size_t outlen, size_t keylen)
-{
- state->h[0] = BLAKE2B_IV0 ^ (0x01010000 | keylen << 8 | outlen);
- state->h[1] = BLAKE2B_IV1;
- state->h[2] = BLAKE2B_IV2;
- state->h[3] = BLAKE2B_IV3;
- state->h[4] = BLAKE2B_IV4;
- state->h[5] = BLAKE2B_IV5;
- state->h[6] = BLAKE2B_IV6;
- state->h[7] = BLAKE2B_IV7;
- state->t[0] = 0;
- state->t[1] = 0;
-}
-
-static inline int crypto_blake2b_init(struct shash_desc *desc)
-{
- const struct blake2b_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- struct blake2b_state *state = shash_desc_ctx(desc);
- unsigned int outlen = crypto_shash_digestsize(desc->tfm);
-
- __crypto_blake2b_init(state, outlen, tctx->keylen);
- return tctx->keylen ?
- crypto_shash_update(desc, tctx->key, BLAKE2B_BLOCK_SIZE) : 0;
-}
-
-static inline int crypto_blake2b_update_bo(struct shash_desc *desc,
- const u8 *in, unsigned int inlen,
- blake2b_compress_t compress)
-{
- struct blake2b_state *state = shash_desc_ctx(desc);
-
- blake2b_set_nonlast(state);
- compress(state, in, inlen / BLAKE2B_BLOCK_SIZE, BLAKE2B_BLOCK_SIZE);
- return inlen - round_down(inlen, BLAKE2B_BLOCK_SIZE);
-}
-
-static inline int crypto_blake2b_finup(struct shash_desc *desc, const u8 *in,
- unsigned int inlen, u8 *out,
- blake2b_compress_t compress)
-{
- struct blake2b_state *state = shash_desc_ctx(desc);
- u8 buf[BLAKE2B_BLOCK_SIZE];
- int i;
-
- memcpy(buf, in, inlen);
- memset(buf + inlen, 0, BLAKE2B_BLOCK_SIZE - inlen);
- blake2b_set_lastblock(state);
- compress(state, buf, 1, inlen);
- for (i = 0; i < ARRAY_SIZE(state->h); i++)
- __cpu_to_le64s(&state->h[i]);
- memcpy(out, state->h, crypto_shash_digestsize(desc->tfm));
- memzero_explicit(buf, sizeof(buf));
- return 0;
-}
-
-#endif /* _CRYPTO_INTERNAL_BLAKE2B_H */
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH 10/10] btrfs: switch to library APIs for checksums
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (8 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 09/10] crypto: blake2b - Reimplement using library API Eric Biggers
@ 2025-10-18 4:31 ` Eric Biggers
2025-10-22 7:11 ` David Sterba
2025-10-22 10:06 ` [PATCH 00/10] BLAKE2b library API Ard Biesheuvel
2025-10-24 19:21 ` Eric Biggers
11 siblings, 1 reply; 20+ messages in thread
From: Eric Biggers @ 2025-10-18 4:31 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld, Eric Biggers
Make btrfs use the library APIs instead of crypto_shash, for all
checksum computations. This has many benefits:
- Allows future checksum types, e.g. XXH3 or CRC64, to be more easily
supported. Only a library API will be needed, not crypto_shash too.
- Eliminates the overhead of the generic crypto layer, including an
indirect call for every function call and other API overhead. A
microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a
speedup from 658 cycles to 608 cycles per 4096-byte block.
- Decreases the stack usage of btrfs by reducing the size of checksum
contexts from 384 bytes to 240 bytes, and by eliminating the need for
some functions to declare a checksum context at all.
- Increases reliability. The library functions always succeed and
return void. In contrast, crypto_shash can fail and return errors.
Also, the library functions are guaranteed to be available when btrfs
is loaded; there's no longer any need to use module softdeps to try to
work around the crypto modules sometimes not being loaded.
- Fixes a bug where blake2b checksums didn't work on kernels booted with
fips=1. Since btrfs checksums are for integrity only, it's fine for
them to use non-FIPS-approved algorithms.
Note that with having to handle 4 algorithms instead of just 1-2, this
commit does result in a slightly positive diffstat. That being said,
this wouldn't have been the case if btrfs had actually checked for
errors from crypto_shash, which technically it should have been doing.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
crypto/Kconfig | 2 -
fs/btrfs/Kconfig | 8 ++--
fs/btrfs/compression.c | 1 -
fs/btrfs/disk-io.c | 68 ++++++++---------------------
fs/btrfs/file-item.c | 4 --
fs/btrfs/fs.c | 97 ++++++++++++++++++++++++++++++++++++------
fs/btrfs/fs.h | 23 +++++++---
fs/btrfs/inode.c | 13 +++---
fs/btrfs/scrub.c | 16 +++----
fs/btrfs/super.c | 4 --
fs/btrfs/sysfs.c | 6 +--
11 files changed, 136 insertions(+), 106 deletions(-)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 0a7e74ac870b0..cb2a1325d6c0a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -892,12 +892,10 @@ config CRYPTO_BLAKE2B
- blake2b-160
- blake2b-256
- blake2b-384
- blake2b-512
- Used by the btrfs filesystem.
-
See https://blake2.net for further information.
config CRYPTO_CMAC
tristate "CMAC (Cipher-based MAC)"
select CRYPTO_HASH
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 4438637c8900c..bf7feff2fe44d 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -2,24 +2,22 @@
config BTRFS_FS
tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
select CRC32
- select CRYPTO
- select CRYPTO_CRC32C
- select CRYPTO_XXHASH
- select CRYPTO_SHA256
- select CRYPTO_BLAKE2B
+ select CRYPTO_LIB_BLAKE2B
+ select CRYPTO_LIB_SHA256
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
select LZO_DECOMPRESS
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
+ select XXHASH
depends on PAGE_SIZE_LESS_THAN_256KB
help
Btrfs is a general purpose copy-on-write filesystem with extents,
writable snapshotting, support for multiple devices and many more
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index bacad18357b33..12c41a3ce705f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -19,11 +19,10 @@
#include <linux/psi.h>
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include <linux/log2.h>
#include <linux/shrinker.h>
-#include <crypto/hash.h>
#include "misc.h"
#include "ctree.h"
#include "fs.h"
#include "btrfs_inode.h"
#include "bio.h"
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0aa7e5d1b05f6..fc02e5483071e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -16,11 +16,10 @@
#include <linux/semaphore.h>
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
#include <linux/unaligned.h>
-#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "bio.h"
@@ -59,30 +58,23 @@
BTRFS_SUPER_FLAG_METADUMP_V2)
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
-static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
-{
- if (fs_info->csum_shash)
- crypto_free_shash(fs_info->csum_shash);
-}
-
/*
* Compute the csum of a btree block and store the result to provided buffer.
*/
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
int num_pages;
u32 first_page_part;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct btrfs_csum_ctx csum;
char *kaddr;
int i;
- shash->tfm = fs_info->csum_shash;
- crypto_shash_init(shash);
+ btrfs_csum_init(&csum, fs_info->csum_type);
if (buf->addr) {
/* Pages are contiguous, handle them as a big one. */
kaddr = buf->addr;
first_page_part = fs_info->nodesize;
@@ -91,25 +83,25 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
kaddr = folio_address(buf->folios[0]);
first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
num_pages = num_extent_pages(buf);
}
- crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
- first_page_part - BTRFS_CSUM_SIZE);
+ btrfs_csum_update(&csum, kaddr + BTRFS_CSUM_SIZE,
+ first_page_part - BTRFS_CSUM_SIZE);
/*
* Multiple single-page folios case would reach here.
*
* nodesize <= PAGE_SIZE and large folio all handled by above
- * crypto_shash_update() already.
+ * btrfs_csum_update() already.
*/
for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
kaddr = folio_address(buf->folios[i]);
- crypto_shash_update(shash, kaddr, PAGE_SIZE);
+ btrfs_csum_update(&csum, kaddr, PAGE_SIZE);
}
memset(result, 0, BTRFS_CSUM_SIZE);
- crypto_shash_final(shash, result);
+ btrfs_csum_final(&csum, result);
}
/*
* we can't consider a given block up to date unless the transid of the
* block matches the transid in the parent node's pointer. This is how we
@@ -157,22 +149,19 @@ static bool btrfs_supported_super_csum(u16 csum_type)
* algorithm. Pass the raw disk superblock data.
*/
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *disk_sb)
{
- char result[BTRFS_CSUM_SIZE];
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-
- shash->tfm = fs_info->csum_shash;
+ u8 result[BTRFS_CSUM_SIZE];
/*
* The super_block structure does not span the whole
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
* filled with zeros and is included in the checksum.
*/
- crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
- BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
+ btrfs_csum(fs_info->csum_type, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
if (memcmp(disk_sb->csum, result, fs_info->csum_size))
return 1;
return 0;
@@ -1254,11 +1243,10 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
percpu_counter_destroy(&fs_info->ordered_bytes);
if (percpu_counter_initialized(em_counter))
ASSERT(percpu_counter_sum_positive(em_counter) == 0);
percpu_counter_destroy(em_counter);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
- btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
kfree(fs_info->balance_ctl);
kfree(fs_info->delayed_root);
free_global_roots(fs_info);
@@ -2013,25 +2001,12 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
}
return 0;
}
-static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
+static void btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
{
- struct crypto_shash *csum_shash;
- const char *csum_driver = btrfs_super_csum_driver(csum_type);
-
- csum_shash = crypto_alloc_shash(csum_driver, 0, 0);
-
- if (IS_ERR(csum_shash)) {
- btrfs_err(fs_info, "error allocating %s hash for checksum",
- csum_driver);
- return PTR_ERR(csum_shash);
- }
-
- fs_info->csum_shash = csum_shash;
-
/* Check if the checksum implementation is a fast accelerated one. */
switch (csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
if (crc32_optimizations() & CRC32C_OPTIMIZATION)
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
@@ -2041,14 +2016,12 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
break;
default:
break;
}
- btrfs_info(fs_info, "using %s (%s) checksum algorithm",
- btrfs_super_csum_name(csum_type),
- crypto_shash_driver_name(csum_shash));
- return 0;
+ btrfs_info(fs_info, "using %s checksum algorithm",
+ btrfs_super_csum_name(csum_type));
}
static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices)
{
@@ -3328,16 +3301,13 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_release_disk_super(disk_super);
goto fail_alloc;
}
fs_info->csum_size = btrfs_super_csum_size(disk_super);
+ fs_info->csum_type = csum_type;
- ret = btrfs_init_csum_hash(fs_info, csum_type);
- if (ret) {
- btrfs_release_disk_super(disk_super);
- goto fail_alloc;
- }
+ btrfs_init_csum_hash(fs_info, csum_type);
/*
* We want to check superblock checksum, the type is stored inside.
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
*/
@@ -3727,22 +3697,19 @@ static void btrfs_end_super_write(struct bio *bio)
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct address_space *mapping = device->bdev->bd_mapping;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int ret;
u64 bytenr, bytenr_orig;
atomic_set(&device->sb_write_errors, 0);
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
- shash->tfm = fs_info->csum_shash;
-
for (i = 0; i < max_mirrors; i++) {
struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
size_t offset;
@@ -3762,13 +3729,12 @@ static int write_dev_supers(struct btrfs_device *device,
device->commit_total_bytes)
break;
btrfs_set_super_bytenr(sb, bytenr_orig);
- crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
- BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
- sb->csum);
+ btrfs_csum(fs_info->csum_type, (const u8 *)sb + BTRFS_CSUM_SIZE,
+ BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum);
folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
GFP_NOFS);
if (IS_ERR(folio)) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a42e6d54e7cd7..b886306721b3b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -6,11 +6,10 @@
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/sched/mm.h>
-#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "bio.h"
@@ -770,11 +769,10 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
int btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
struct bvec_iter iter = bio->bi_iter;
phys_addr_t paddr;
const u32 blocksize = fs_info->sectorsize;
@@ -793,12 +791,10 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
INIT_LIST_HEAD(&sums->list);
sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
index = 0;
- shash->tfm = fs_info->csum_shash;
-
btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
index += fs_info->csum_size;
}
diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c
index feb0a2faa8379..211ed50e96f33 100644
--- a/fs/btrfs/fs.c
+++ b/fs/btrfs/fs.c
@@ -1,22 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/crc32.h>
#include "messages.h"
#include "fs.h"
#include "accessors.h"
#include "volumes.h"
static const struct btrfs_csums {
u16 size;
const char name[10];
- const char driver[12];
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
- [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
- .driver = "blake2b-256" },
+ [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b" },
};
/* This exists for btrfs-progs usages. */
u16 btrfs_csum_type_size(u16 type)
{
@@ -35,25 +34,95 @@ const char *btrfs_super_csum_name(u16 csum_type)
{
/* csum type is validated at mount time. */
return btrfs_csums[csum_type].name;
}
-/*
- * Return driver name if defined, otherwise the name that's also a valid driver
- * name.
- */
-const char *btrfs_super_csum_driver(u16 csum_type)
+size_t __attribute_const__ btrfs_get_num_csums(void)
{
- /* csum type is validated at mount time */
- return btrfs_csums[csum_type].driver[0] ?
- btrfs_csums[csum_type].driver :
- btrfs_csums[csum_type].name;
+ return ARRAY_SIZE(btrfs_csums);
}
-size_t __attribute_const__ btrfs_get_num_csums(void)
+void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out)
{
- return ARRAY_SIZE(btrfs_csums);
+ switch (csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ put_unaligned_le32(~crc32c(~0, data, len), out);
+ break;
+ case BTRFS_CSUM_TYPE_XXHASH:
+ put_unaligned_le64(xxh64(data, len, 0), out);
+ break;
+ case BTRFS_CSUM_TYPE_SHA256:
+ sha256(data, len, out);
+ break;
+ case BTRFS_CSUM_TYPE_BLAKE2:
+ blake2b(NULL, 0, data, len, out, 32);
+ break;
+ default:
+ BUG(); /* csum type is validated at mount time. */
+ }
+}
+
+void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type)
+{
+ ctx->csum_type = csum_type;
+ switch (ctx->csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ ctx->crc32 = ~0;
+ break;
+ case BTRFS_CSUM_TYPE_XXHASH:
+ xxh64_reset(&ctx->xxh64, 0);
+ break;
+ case BTRFS_CSUM_TYPE_SHA256:
+ sha256_init(&ctx->sha256);
+ break;
+ case BTRFS_CSUM_TYPE_BLAKE2:
+ blake2b_init(&ctx->blake2b, 32);
+ break;
+ default:
+ BUG(); /* csum type is validated at mount time. */
+ }
+}
+
+void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len)
+{
+ switch (ctx->csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ ctx->crc32 = crc32c(ctx->crc32, data, len);
+ break;
+ case BTRFS_CSUM_TYPE_XXHASH:
+ xxh64_update(&ctx->xxh64, data, len);
+ break;
+ case BTRFS_CSUM_TYPE_SHA256:
+ sha256_update(&ctx->sha256, data, len);
+ break;
+ case BTRFS_CSUM_TYPE_BLAKE2:
+ blake2b_update(&ctx->blake2b, data, len);
+ break;
+ default:
+ BUG(); /* csum type is validated at mount time. */
+
+ }
+}
+
+void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out)
+{
+ switch (ctx->csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ put_unaligned_le32(~ctx->crc32, out);
+ break;
+ case BTRFS_CSUM_TYPE_XXHASH:
+ put_unaligned_le64(xxh64_digest(&ctx->xxh64), out);
+ break;
+ case BTRFS_CSUM_TYPE_SHA256:
+ sha256_final(&ctx->sha256, out);
+ break;
+ case BTRFS_CSUM_TYPE_BLAKE2:
+ blake2b_final(&ctx->blake2b, out);
+ break;
+ default:
+ BUG(); /* csum type is validated at mount time. */
+ }
}
/*
* We support the following block sizes for all systems:
*
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 814bbc9417d2a..3fd9cb25379aa 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -1,10 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BTRFS_FS_H
#define BTRFS_FS_H
+#include <crypto/blake2b.h>
+#include <crypto/sha2.h>
#include <linux/blkdev.h>
#include <linux/sizes.h>
#include <linux/time64.h>
#include <linux/compiler.h>
#include <linux/math.h>
@@ -22,21 +24,21 @@
#include <linux/workqueue.h>
#include <linux/wait.h>
#include <linux/wait_bit.h>
#include <linux/sched.h>
#include <linux/rbtree.h>
+#include <linux/xxhash.h>
#include <uapi/linux/btrfs.h>
#include <uapi/linux/btrfs_tree.h>
#include "extent-io-tree.h"
#include "async-thread.h"
#include "block-rsv.h"
struct inode;
struct super_block;
struct kobject;
struct reloc_control;
-struct crypto_shash;
struct ulist;
struct btrfs_device;
struct btrfs_block_group;
struct btrfs_root;
struct btrfs_fs_devices;
@@ -827,13 +829,14 @@ struct btrfs_fs_info {
u32 sectorsize;
/* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
u32 block_min_order;
u32 block_max_order;
+ u32 stripesize;
u32 csum_size;
u32 csums_per_leaf;
- u32 stripesize;
+ u16 csum_type;
/*
* Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
* filesystem, on zoned it depends on the device constraints.
*/
@@ -841,12 +844,10 @@ struct btrfs_fs_info {
/* Block groups and devices containing active swapfiles. */
spinlock_t swapfile_pins_lock;
struct rb_root swapfile_pins;
- struct crypto_shash *csum_shash;
-
/* Type of exclusive operation running, protected by super_lock */
enum btrfs_exclusive_operation exclusive_operation;
/*
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
@@ -1034,12 +1035,24 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args);
u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
-const char *btrfs_super_csum_driver(u16 csum_type);
size_t __attribute_const__ btrfs_get_num_csums(void);
+struct btrfs_csum_ctx {
+ u16 csum_type;
+ union {
+ u32 crc32;
+ struct xxh64_state xxh64;
+ struct sha256_ctx sha256;
+ struct blake2b_ctx blake2b;
+ };
+};
+void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out);
+void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type);
+void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len);
+void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out);
static inline bool btrfs_is_empty_uuid(const u8 *uuid)
{
return uuid_is_null((const uuid_t *)uuid);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3b1b3a0553eea..f2aee871d660a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1,11 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <crypto/hash.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/blk-cgroup.h>
#include <linux/file.h>
#include <linux/fs.h>
@@ -3333,33 +3332,33 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
u8 *dest)
{
struct folio *folio = page_folio(phys_to_page(paddr));
const u32 blocksize = fs_info->sectorsize;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct btrfs_csum_ctx csum;
- shash->tfm = fs_info->csum_shash;
/* The full block must be inside the folio. */
ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio));
if (folio_test_partial_kmap(folio)) {
size_t cur = paddr;
- crypto_shash_init(shash);
+ btrfs_csum_init(&csum, fs_info->csum_type);
while (cur < paddr + blocksize) {
void *kaddr;
size_t len = min(paddr + blocksize - cur,
PAGE_SIZE - offset_in_page(cur));
kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur));
- crypto_shash_update(shash, kaddr, len);
+ btrfs_csum_update(&csum, kaddr, len);
kunmap_local(kaddr);
cur += len;
}
- crypto_shash_final(shash, dest);
+ btrfs_csum_final(&csum, dest);
} else {
- crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest);
+ btrfs_csum(fs_info->csum_type, phys_to_virt(paddr), blocksize,
+ dest);
}
}
/*
* Verify the checksum for a single sector without any extra action that depend
* on the type of I/O.
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4691d0bdb2e86..38b2ee1c455aa 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4,11 +4,10 @@
*/
#include <linux/blkdev.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
-#include <crypto/hash.h>
#include "ctree.h"
#include "discard.h"
#include "volumes.h"
#include "disk-io.h"
#include "ordered-data.h"
@@ -718,11 +717,11 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
const u64 logical = stripe->logical + (sector_nr << fs_info->sectorsize_bits);
void *first_kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
struct btrfs_header *header = first_kaddr;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
+ struct btrfs_csum_ctx csum;
u8 on_disk_csum[BTRFS_CSUM_SIZE];
u8 calculated_csum[BTRFS_CSUM_SIZE];
/*
* Here we don't have a good way to attach the pages (and subpages)
@@ -760,21 +759,20 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
header->chunk_tree_uuid, fs_info->chunk_tree_uuid);
return;
}
/* Now check tree block csum. */
- shash->tfm = fs_info->csum_shash;
- crypto_shash_init(shash);
- crypto_shash_update(shash, first_kaddr + BTRFS_CSUM_SIZE,
- fs_info->sectorsize - BTRFS_CSUM_SIZE);
+ btrfs_csum_init(&csum, fs_info->csum_type);
+ btrfs_csum_update(&csum, first_kaddr + BTRFS_CSUM_SIZE,
+ fs_info->sectorsize - BTRFS_CSUM_SIZE);
for (int i = sector_nr + 1; i < sector_nr + sectors_per_tree; i++) {
- crypto_shash_update(shash, scrub_stripe_get_kaddr(stripe, i),
- fs_info->sectorsize);
+ btrfs_csum_update(&csum, scrub_stripe_get_kaddr(stripe, i),
+ fs_info->sectorsize);
}
- crypto_shash_final(shash, calculated_csum);
+ btrfs_csum_final(&csum, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, fs_info->csum_size) != 0) {
scrub_bitmap_set_meta_error(stripe, sector_nr, sectors_per_tree);
scrub_bitmap_set_error(stripe, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"scrub: tree block %llu mirror %u has bad csum, has " CSUM_FMT " want " CSUM_FMT,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d6e496436539d..e17b2f6b08a62 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2636,9 +2636,5 @@ static int __init init_btrfs_fs(void)
late_initcall(init_btrfs_fs);
module_exit(exit_btrfs_fs)
MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc32c");
-MODULE_SOFTDEP("pre: xxhash64");
-MODULE_SOFTDEP("pre: sha256");
-MODULE_SOFTDEP("pre: blake2b-256");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 81f52c1f55ce5..6321c7836e4fe 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -8,11 +8,10 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/bug.h>
#include <linux/list.h>
-#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
#include "discard.h"
#include "disk-io.h"
#include "send.h"
@@ -1250,14 +1249,13 @@ BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
static ssize_t btrfs_checksum_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+ const char *csum_name = btrfs_super_csum_name(csum_type);
- return sysfs_emit(buf, "%s (%s)\n",
- btrfs_super_csum_name(csum_type),
- crypto_shash_driver_name(fs_info->csum_shash));
+ return sysfs_emit(buf, "%s (%s-lib)\n", csum_name, csum_name);
}
BTRFS_ATTR(, checksum, btrfs_checksum_show);
static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
--
2.51.1.dirty
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH 10/10] btrfs: switch to library APIs for checksums
2025-10-18 4:31 ` [PATCH 10/10] btrfs: switch to library APIs for checksums Eric Biggers
@ 2025-10-22 7:11 ` David Sterba
2025-10-22 17:59 ` Eric Biggers
0 siblings, 1 reply; 20+ messages in thread
From: David Sterba @ 2025-10-22 7:11 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel, Jason A . Donenfeld
On Fri, Oct 17, 2025 at 09:31:06PM -0700, Eric Biggers wrote:
> Make btrfs use the library APIs instead of crypto_shash, for all
> checksum computations. This has many benefits:
>
> - Allows future checksum types, e.g. XXH3 or CRC64, to be more easily
> supported. Only a library API will be needed, not crypto_shash too.
>
> - Eliminates the overhead of the generic crypto layer, including an
> indirect call for every function call and other API overhead. A
> microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a
> speedup from 658 cycles to 608 cycles per 4096-byte block.
>
> - Decreases the stack usage of btrfs by reducing the size of checksum
> contexts from 384 bytes to 240 bytes, and by eliminating the need for
> some functions to declare a checksum context at all.
>
> - Increases reliability. The library functions always succeed and
> return void. In contrast, crypto_shash can fail and return errors.
> Also, the library functions are guaranteed to be available when btrfs
> is loaded; there's no longer any need to use module softdeps to try to
> work around the crypto modules sometimes not being loaded.
>
> - Fixes a bug where blake2b checksums didn't work on kernels booted with
> fips=1. Since btrfs checksums are for integrity only, it's fine for
> them to use non-FIPS-approved algorithms.
>
> Note that with having to handle 4 algorithms instead of just 1-2, this
> commit does result in a slightly positive diffstat. That being said,
> this wouldn't have been the case if btrfs had actually checked for
> errors from crypto_shash, which technically it should have been doing.
>
> Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Thanks, this simplifies quite a few things. I'd like to take it via the
btrfs tree as there may be the hash additions (XXH3, BLAKE3) but
currently I'm not sure if it won't make things more complicated. I
haven't started the kernel part yet so I can use this patchset for
development and rebase once it's merged.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 10/10] btrfs: switch to library APIs for checksums
2025-10-22 7:11 ` David Sterba
@ 2025-10-22 17:59 ` Eric Biggers
2025-10-23 18:45 ` David Sterba
0 siblings, 1 reply; 20+ messages in thread
From: Eric Biggers @ 2025-10-22 17:59 UTC (permalink / raw)
To: David Sterba
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel, Jason A . Donenfeld
On Wed, Oct 22, 2025 at 09:11:41AM +0200, David Sterba wrote:
> On Fri, Oct 17, 2025 at 09:31:06PM -0700, Eric Biggers wrote:
> > Make btrfs use the library APIs instead of crypto_shash, for all
> > checksum computations. This has many benefits:
> >
> > - Allows future checksum types, e.g. XXH3 or CRC64, to be more easily
> > supported. Only a library API will be needed, not crypto_shash too.
> >
> > - Eliminates the overhead of the generic crypto layer, including an
> > indirect call for every function call and other API overhead. A
> > microbenchmark of btrfs_check_read_bio() with crc32c checksums shows a
> > speedup from 658 cycles to 608 cycles per 4096-byte block.
> >
> > - Decreases the stack usage of btrfs by reducing the size of checksum
> > contexts from 384 bytes to 240 bytes, and by eliminating the need for
> > some functions to declare a checksum context at all.
> >
> > - Increases reliability. The library functions always succeed and
> > return void. In contrast, crypto_shash can fail and return errors.
> > Also, the library functions are guaranteed to be available when btrfs
> > is loaded; there's no longer any need to use module softdeps to try to
> > work around the crypto modules sometimes not being loaded.
> >
> > - Fixes a bug where blake2b checksums didn't work on kernels booted with
> > fips=1. Since btrfs checksums are for integrity only, it's fine for
> > them to use non-FIPS-approved algorithms.
> >
> > Note that with having to handle 4 algorithms instead of just 1-2, this
> > commit does result in a slightly positive diffstat. That being said,
> > this wouldn't have been the case if btrfs had actually checked for
> > errors from crypto_shash, which technically it should have been doing.
> >
> > Signed-off-by: Eric Biggers <ebiggers@kernel.org>
>
> Thanks, this simplifies quite a few things. I'd like to take it via the
> btrfs tree as there may be the hash additions (XXH3, BLAKE3) but
> currently I'm not sure if it won't make things more complicated. I
> haven't started the kernel part yet so I can use this patchset for
> development and rebase once it's merged.
Great. I'm planning to take patches 1-9 through libcrypto-next for
6.19. You can then take patch 10 through the btrfs tree for 6.20. Does
that sound good? We can work out the XXH3 and BLAKE3 support later. If
you'd like to add another checksum algorithm, I'd suggest picking just
one. btrfs already supports an awful lot of choices for the checksum.
But we can discuss that later.
- Eric
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 10/10] btrfs: switch to library APIs for checksums
2025-10-22 17:59 ` Eric Biggers
@ 2025-10-23 18:45 ` David Sterba
0 siblings, 0 replies; 20+ messages in thread
From: David Sterba @ 2025-10-23 18:45 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Ard Biesheuvel, Jason A . Donenfeld
On Wed, Oct 22, 2025 at 10:59:34AM -0700, Eric Biggers wrote:
> > Thanks, this simplifies quite a few things. I'd like to take it via the
> > btrfs tree as there may be the hash additions (XXH3, BLAKE3) but
> > currently I'm not sure if it won't make things more complicated. I
> > haven't started the kernel part yet so I can use this patchset for
> > development and rebase once it's merged.
>
> Great. I'm planning to take patches 1-9 through libcrypto-next for
> 6.19. You can then take patch 10 through the btrfs tree for 6.20. Does
> that sound good?
Yes, the 6.20 schedule works better for me.
> We can work out the XXH3 and BLAKE3 support later. If
> you'd like to add another checksum algorithm, I'd suggest picking just
> one. btrfs already supports an awful lot of choices for the checksum.
> But we can discuss that later.
Yes, I'v deleted long answer to that, it would be better to discuss that
separately once the xxh3 and blake3 get posted.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH 00/10] BLAKE2b library API
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (9 preceding siblings ...)
2025-10-18 4:31 ` [PATCH 10/10] btrfs: switch to library APIs for checksums Eric Biggers
@ 2025-10-22 10:06 ` Ard Biesheuvel
2025-10-24 19:21 ` Eric Biggers
11 siblings, 0 replies; 20+ messages in thread
From: Ard Biesheuvel @ 2025-10-22 10:06 UTC (permalink / raw)
To: Eric Biggers
Cc: linux-crypto, linux-kernel, linux-btrfs, linux-arm-kernel,
Jason A . Donenfeld
On Sat, 18 Oct 2025 at 06:36, Eric Biggers <ebiggers@kernel.org> wrote:
>
> This series can also be retrieved from:
>
> git fetch https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git blake2b-lib-v1
>
> This series adds BLAKE2b support to lib/crypto/ and reimplements the
> blake2b-* crypto_shash algorithms on top of it.
>
> To prepare for that, patches 1-4 clean up the BLAKE2s library code a
> bit, and patch 5 adds some missing 64-bit byteorder helper functions.
> Patches 6-8 add the BLAKE2b library API (closely mirroring the BLAKE2s
> one), and patch 9 makes crypto_shash use it. As usual, the library APIs
> are documented (with kerneldoc) and tested (with KUnit).
>
> With that done, all of btrfs's checksum algorithms have library APIs.
> So patch 10 converts btrfs to use the library APIs instead of shash.
> This has quite a few benefits, as detailed in that patch.
>
> Patches 1-9 are targeting libcrypto-next for 6.19. Patch 10 can go
> through the btrfs tree later.
>
> Eric Biggers (10):
> lib/crypto: blake2s: Adjust parameter order of blake2s()
> lib/crypto: blake2s: Rename blake2s_state to blake2s_ctx
> lib/crypto: blake2s: Drop excessive const & rename block => data
> lib/crypto: blake2s: Document the BLAKE2s library API
> byteorder: Add le64_to_cpu_array() and cpu_to_le64_array()
> lib/crypto: blake2b: Add BLAKE2b library functions
> lib/crypto: arm/blake2b: Migrate optimized code into library
> lib/crypto: tests: Add KUnit tests for BLAKE2b
> crypto: blake2b - Reimplement using library API
> btrfs: switch to library APIs for checksums
>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH 00/10] BLAKE2b library API
2025-10-18 4:30 [PATCH 00/10] BLAKE2b library API Eric Biggers
` (10 preceding siblings ...)
2025-10-22 10:06 ` [PATCH 00/10] BLAKE2b library API Ard Biesheuvel
@ 2025-10-24 19:21 ` Eric Biggers
11 siblings, 0 replies; 20+ messages in thread
From: Eric Biggers @ 2025-10-24 19:21 UTC (permalink / raw)
To: linux-crypto
Cc: linux-kernel, linux-btrfs, linux-arm-kernel, Ard Biesheuvel,
Jason A . Donenfeld
On Fri, Oct 17, 2025 at 09:30:56PM -0700, Eric Biggers wrote:
> This series can also be retrieved from:
>
> git fetch https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git blake2b-lib-v1
>
> This series adds BLAKE2b support to lib/crypto/ and reimplements the
> blake2b-* crypto_shash algorithms on top of it.
>
> To prepare for that, patches 1-4 clean up the BLAKE2s library code a
> bit, and patch 5 adds some missing 64-bit byteorder helper functions.
> Patches 6-8 add the BLAKE2b library API (closely mirroring the BLAKE2s
> one), and patch 9 makes crypto_shash use it. As usual, the library APIs
> are documented (with kerneldoc) and tested (with KUnit).
>
> With that done, all of btrfs's checksum algorithms have library APIs.
> So patch 10 converts btrfs to use the library APIs instead of shash.
> This has quite a few benefits, as detailed in that patch.
>
> Patches 1-9 are targeting libcrypto-next for 6.19. Patch 10 can go
> through the btrfs tree later.
>
> Eric Biggers (10):
> lib/crypto: blake2s: Adjust parameter order of blake2s()
> lib/crypto: blake2s: Rename blake2s_state to blake2s_ctx
> lib/crypto: blake2s: Drop excessive const & rename block => data
> lib/crypto: blake2s: Document the BLAKE2s library API
> byteorder: Add le64_to_cpu_array() and cpu_to_le64_array()
> lib/crypto: blake2b: Add BLAKE2b library functions
> lib/crypto: arm/blake2b: Migrate optimized code into library
> lib/crypto: tests: Add KUnit tests for BLAKE2b
> crypto: blake2b - Reimplement using library API
> btrfs: switch to library APIs for checksums
Applied patches 1-9 (i.e., all except the btrfs patch) to
https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux.git/log/?h=libcrypto-next
I folded the following fixup into patch 7 to address
https://lore.kernel.org/r/20251019163249.GD1604@sol/ and
https://lore.kernel.org/r/202510221007.WnlC6PmP-lkp@intel.com/
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 5c9a933928188..bc26777d08e97 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -37,5 +37,5 @@ CFLAGS_blake2b.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/sho
ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2B_ARCH),y)
CFLAGS_blake2b.o += -I$(src)/$(SRCARCH)
-obj-$(CONFIG_ARM) += arm/blake2b-neon-core.o
+libblake2b-$(CONFIG_ARM) += arm/blake2b-neon-core.o
endif # CONFIG_CRYPTO_LIB_BLAKE2B_ARCH
^ permalink raw reply related [flat|nested] 20+ messages in thread