From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: stable@vger.kernel.org, Martin Willi <martin@strongswan.org>,
"Jason A . Donenfeld" <Jason@zx2c4.com>
Subject: [RFC/RFT PATCH 01/18] crypto: x86/poly1305 - fix overflow during partial reduction
Date: Sun, 31 Mar 2019 13:04:11 -0700 [thread overview]
Message-ID: <20190331200428.26597-2-ebiggers@kernel.org> (raw)
In-Reply-To: <20190331200428.26597-1-ebiggers@kernel.org>
From: Eric Biggers <ebiggers@google.com>
The x86_64 implementation of Poly1305 produces the wrong result on some
inputs because poly1305_4block_avx2() incorrectly assumes that when
partially reducing the accumulator, the bits carried from limb 'd4' to
limb 'h0' fit in a 32-bit integer. This is true for poly1305-generic
which processes only one block at a time. However, it's not true for
the AVX2 implementation, which processes 4 blocks at a time and
therefore can produce intermediate limbs about 4x larger.
Fix it by making the relevant calculations use 64-bit arithmetic rather
than 32-bit. Note that most of the carries already used 64-bit
arithmetic, but the d4 -> h0 carry was different for some reason.
To be safe I also made the same change to the corresponding SSE2 code,
though that only operates on 1 or 2 blocks at a time. I don't think
it's really needed for poly1305_block_sse2(), but it doesn't hurt
because it's already x86_64 code. It *might* be needed for
poly1305_2block_sse2(), but overflows aren't easy to reproduce there.
This bug was originally detected by my patches that improve testmgr to
fuzz algorithms against their generic implementation. But also add a
test vector which reproduces it directly (in the AVX2 case).
Fixes: b1ccc8f4b631 ("crypto: poly1305 - Add a four block AVX2 variant for x86_64")
Fixes: c70f4abef07a ("crypto: poly1305 - Add a SSE2 SIMD variant for x86_64")
Cc: <stable@vger.kernel.org> # v4.3+
Cc: Martin Willi <martin@strongswan.org>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/poly1305-avx2-x86_64.S | 14 +++++---
arch/x86/crypto/poly1305-sse2-x86_64.S | 22 ++++++++-----
crypto/testmgr.h | 44 +++++++++++++++++++++++++-
3 files changed, 67 insertions(+), 13 deletions(-)
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 3b6e70d085da8..8457cdd47f751 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
vpaddq t2,t1,t1
vmovq t1x,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index e6add74d78a59..6f0be7a869641 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
paddq t2,t1
movq t1,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index f267633cf13ac..d18a37629f053 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -5634,7 +5634,49 @@ static const struct hash_testvec poly1305_tv_template[] = {
.psize = 80,
.digest = "\x13\x00\x00\x00\x00\x00\x00\x00"
"\x00\x00\x00\x00\x00\x00\x00\x00",
- },
+ }, { /* Regression test for overflow in AVX2 implementation */
+ .plaintext = "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff",
+ .psize = 300,
+ .digest = "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
+ "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
+ }
};
/* NHPoly1305 test vectors from https://github.com/google/adiantum */
--
2.21.0
next prev parent reply other threads:[~2019-03-31 20:06 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-31 20:04 [RFC/RFT PATCH 00/18] crypto: fuzz algorithms against their generic implementation Eric Biggers
2019-03-31 20:04 ` Eric Biggers [this message]
2019-04-01 7:52 ` [RFC/RFT PATCH 01/18] crypto: x86/poly1305 - fix overflow during partial reduction Martin Willi
2019-03-31 20:04 ` [RFC/RFT PATCH 02/18] crypto: crct10dif-generic - fix use via crypto_shash_digest() Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 03/18] crypto: x86/crct10dif-pcl " Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 04/18] crypto: skcipher - restore default skcipher_walk::iv on error Eric Biggers
2019-04-08 6:23 ` Herbert Xu
2019-04-08 17:27 ` Eric Biggers
2019-04-09 6:37 ` Herbert Xu
2019-03-31 20:04 ` [RFC/RFT PATCH 05/18] crypto: skcipher - don't WARN on unprocessed data after slow walk step Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 06/18] crypto: chacha20poly1305 - set cra_name correctly Eric Biggers
2019-04-01 7:57 ` Martin Willi
2019-03-31 20:04 ` [RFC/RFT PATCH 07/18] crypto: gcm - fix incompatibility between "gcm" and "gcm_base" Eric Biggers
2019-04-01 15:56 ` Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 08/18] crypto: ccm - fix incompatibility between "ccm" and "ccm_base" Eric Biggers
2019-04-02 15:48 ` Sasha Levin
2019-03-31 20:04 ` [RFC/RFT PATCH 09/18] crypto: streebog - fix unaligned memory accesses Eric Biggers
2019-03-31 21:47 ` Vitaly Chikunov
2019-04-02 16:15 ` Vitaly Chikunov
2019-04-02 16:57 ` Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 10/18] crypto: cts - don't support empty messages Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 11/18] crypto: arm64/cbcmac - handle empty messages in same way as template Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 12/18] crypto: testmgr - expand ability to test for errors Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 13/18] crypto: testmgr - identify test vectors by name rather than number Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 14/18] crypto: testmgr - add helpers for fuzzing against generic implementation Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 15/18] crypto: testmgr - fuzz hashes against their " Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 16/18] crypto: testmgr - fuzz skciphers " Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 17/18] crypto: testmgr - fuzz AEADs " Eric Biggers
2019-03-31 20:04 ` [RFC/RFT PATCH 18/18] crypto: run initcalls for generic implementations earlier Eric Biggers
2019-04-08 5:53 ` Herbert Xu
2019-04-08 17:44 ` Eric Biggers
2019-04-09 6:24 ` Herbert Xu
2019-04-09 18:16 ` Eric Biggers
2019-04-11 6:26 ` Herbert Xu
2019-04-08 6:44 ` [RFC/RFT PATCH 00/18] crypto: fuzz algorithms against their generic implementation Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190331200428.26597-2-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=Jason@zx2c4.com \
--cc=linux-crypto@vger.kernel.org \
--cc=martin@strongswan.org \
--cc=stable@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.