[PATCH 4.14 37/70] crypto: x86/poly1305 - fix overflow during partial reduction

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Martin Willi <martin@strongswan.org>,
	"Jason A. Donenfeld" <Jason@zx2c4.com>,
	Eric Biggers <ebiggers@google.com>,
	Herbert Xu <herbert@gondor.apana.org.au>
Subject: [PATCH 4.14 37/70] crypto: x86/poly1305 - fix overflow during partial reduction
Date: Wed, 24 Apr 2019 19:09:57 +0200	[thread overview]
Message-ID: <20190424170912.235330391@linuxfoundation.org> (raw)
In-Reply-To: <20190424170906.751869122@linuxfoundation.org>

From: Eric Biggers <ebiggers@google.com>

commit 678cce4019d746da6c680c48ba9e6d417803e127 upstream.

The x86_64 implementation of Poly1305 produces the wrong result on some
inputs because poly1305_4block_avx2() incorrectly assumes that when
partially reducing the accumulator, the bits carried from limb 'd4' to
limb 'h0' fit in a 32-bit integer.  This is true for poly1305-generic
which processes only one block at a time.  However, it's not true for
the AVX2 implementation, which processes 4 blocks at a time and
therefore can produce intermediate limbs about 4x larger.

Fix it by making the relevant calculations use 64-bit arithmetic rather
than 32-bit.  Note that most of the carries already used 64-bit
arithmetic, but the d4 -> h0 carry was different for some reason.

To be safe I also made the same change to the corresponding SSE2 code,
though that only operates on 1 or 2 blocks at a time.  I don't think
it's really needed for poly1305_block_sse2(), but it doesn't hurt
because it's already x86_64 code.  It *might* be needed for
poly1305_2block_sse2(), but overflows aren't easy to reproduce there.

This bug was originally detected by my patches that improve testmgr to
fuzz algorithms against their generic implementation.  But also add a
test vector which reproduces it directly (in the AVX2 case).

Fixes: b1ccc8f4b631 ("crypto: poly1305 - Add a four block AVX2 variant for x86_64")
Fixes: c70f4abef07a ("crypto: poly1305 - Add a SSE2 SIMD variant for x86_64")
Cc: <stable@vger.kernel.org> # v4.3+
Cc: Martin Willi <martin@strongswan.org>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
 arch/x86/crypto/poly1305-avx2-x86_64.S |   14 +++++++---
 arch/x86/crypto/poly1305-sse2-x86_64.S |   22 ++++++++++------
 crypto/testmgr.h                       |   44 ++++++++++++++++++++++++++++++++-
 3 files changed, 67 insertions(+), 13 deletions(-)

--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2)
 	vpaddq		t2,t1,t1
 	vmovq		t1x,d4
 
+	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
+	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+	# integers.  It's true in a single-block implementation, but not here.
+
 	# d1 += d0 >> 26
 	mov		d0,%rax
 	shr		$26,%rax
@@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
@@ -520,6 +520,12 @@ ENTRY(poly1305_2block_sse2)
 	paddq		t2,t1
 	movq		t1,d4
 
+	# Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+	# h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+	# amount.  Careful: we must not assume the carry bits 'd0 >> 26',
+	# 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+	# integers.  It's true in a single-block implementation, but not here.
+
 	# d1 += d0 >> 26
 	mov		d0,%rax
 	shr		$26,%rax
@@ -558,16 +564,16 @@ ENTRY(poly1305_2block_sse2)
 	# h0 += (d4 >> 26) * 5
 	mov		d4,%rax
 	shr		$26,%rax
-	lea		(%eax,%eax,4),%eax
-	add		%eax,%ebx
+	lea		(%rax,%rax,4),%rax
+	add		%rax,%rbx
 	# h4 = d4 & 0x3ffffff
 	mov		d4,%rax
 	and		$0x3ffffff,%eax
 	mov		%eax,h4
 
 	# h1 += h0 >> 26
-	mov		%ebx,%eax
-	shr		$26,%eax
+	mov		%rbx,%rax
+	shr		$26,%rax
 	add		%eax,h1
 	# h0 = h0 & 0x3ffffff
 	andl		$0x3ffffff,%ebx
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -4660,7 +4660,49 @@ static const struct hash_testvec poly130
 		.psize		= 80,
 		.digest		= "\x13\x00\x00\x00\x00\x00\x00\x00"
 				  "\x00\x00\x00\x00\x00\x00\x00\x00",
-	},
+	}, { /* Regression test for overflow in AVX2 implementation */
+		.plaintext	= "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff\xff\xff\xff\xff"
+				  "\xff\xff\xff\xff",
+		.psize		= 300,
+		.digest		= "\xfb\x5e\x96\xd8\x61\xd5\xc7\xc8"
+				  "\x78\xe5\x87\xcc\x2d\x5a\x22\xe1",
+	}
 };
 
 /*

next prev parent reply	other threads:[~2019-04-24 17:53 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-04-24 17:09 [PATCH 4.14 00/70] 4.14.114-stable review Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 01/70] bonding: fix event handling for stacked bonds Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 02/70] net: atm: Fix potential Spectre v1 vulnerabilities Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 03/70] net: bridge: fix per-port af_packet sockets Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 04/70] net: bridge: multicast: use rcu to access port list from br_multicast_start_querier Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 05/70] net: fou: do not use guehdr after iptunnel_pull_offloads in gue_udp_recv Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 06/70] tcp: tcp_grow_window() needs to respect tcp_space() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 07/70] team: set slave to promisc if team is already in promisc mode Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 08/70] vhost: reject zero size iova range Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 09/70] ipv4: recompile ip options in ipv4_link_failure Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 10/70] ipv4: ensure rcu_read_lock() in ipv4_link_failure() Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 11/70] net: thunderx: raise XDP MTU to 1508 Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 12/70] net: thunderx: dont allow jumbo frames with XDP Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 13/70] CIFS: keep FileInfo handle live during oplock break Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 14/70] KVM: x86: Dont clear EFER during SMM transitions for 32-bit vCPU Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 15/70] KVM: x86: svm: make sure NMI is injected after nmi_singlestep Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 16/70] Staging: iio: meter: fixed typo Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 17/70] staging: iio: ad7192: Fix ad7193 channel address Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 18/70] iio: gyro: mpu3050: fix chip ID reading Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 19/70] iio/gyro/bmg160: Use millidegrees for temperature scale Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 20/70] iio: cros_ec: Fix the maths for gyro scale calculation Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 21/70] iio: ad_sigma_delta: select channel when reading register Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 22/70] iio: dac: mcp4725: add missing powerdown bits in store eeprom Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 23/70] iio: Fix scan mask selection Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 24/70] iio: adc: at91: disable adc channel interrupt in timeout case Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 25/70] iio: core: fix a possible circular locking dependency Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 26/70] io: accel: kxcjk1013: restore the range after resume Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 27/70] staging: comedi: vmk80xx: Fix use of uninitialized semaphore Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 28/70] staging: comedi: vmk80xx: Fix possible double-free of ->usb_rx_buf Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 29/70] staging: comedi: ni_usb6501: Fix use of uninitialized mutex Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 30/70] staging: comedi: ni_usb6501: Fix possible double-free of ->usb_rx_buf Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 31/70] ALSA: hda/realtek - add two more pin configuration sets to quirk table Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 32/70] ALSA: core: Fix card races between register and disconnect Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 33/70] scsi: core: set result when the command cannot be dispatched Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 34/70] Revert "scsi: fcoe: clear FC_RP_STARTED flags when receiving a LOGO" Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 35/70] Revert "svm: Fix AVIC incomplete IPI emulation" Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 36/70] coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping Greg Kroah-Hartman
2019-04-24 17:09 ` Greg Kroah-Hartman [this message]
2019-04-24 17:09 ` [PATCH 4.14 38/70] arm64: futex: Restore oldval initialization to work around buggy compilers Greg Kroah-Hartman
2019-04-24 17:09 ` [PATCH 4.14 39/70] x86/kprobes: Verify stack frame on kretprobe Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 40/70] kprobes: Mark ftrace mcount handler functions nokprobe Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 41/70] kprobes: Fix error check when reusing optimized probes Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 42/70] rt2x00: do not increment sequence number while re-transmitting Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 43/70] mac80211: do not call driver wake_tx_queue op during reconfig Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 44/70] perf/x86/amd: Add event map for AMD Family 17h Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 45/70] x86/cpu/bugs: Use __initconst for const init data Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 46/70] perf/x86: Fix incorrect PEBS_REGS Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 47/70] x86/speculation: Prevent deadlock on ssb_state::lock Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 48/70] crypto: crypto4xx - properly set IV after de- and encrypt Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 49/70] mmc: sdhci: Fix data command CRC error handling Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 50/70] mmc: sdhci: Rename SDHCI_ACMD12_ERR and SDHCI_INT_ACMD12ERR Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 51/70] mmc: sdhci: Handle auto-command errors Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 52/70] modpost: file2alias: go back to simple devtable lookup Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 53/70] modpost: file2alias: check prototype of handler Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 54/70] tpm/tpm_i2c_atmel: Return -E2BIG when the transfer is incomplete Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 55/70] ipv6: frags: fix a lockdep false positive Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 56/70] net: IP defrag: encapsulate rbtree defrag code into callable functions Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 57/70] ipv6: remove dependency of nf_defrag_ipv6 on ipv6 module Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 58/70] net: IP6 defrag: use rbtrees for IPv6 defrag Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 59/70] net: IP6 defrag: use rbtrees in nf_conntrack_reasm.c Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 60/70] Revert "kbuild: use -Oz instead of -Os when using clang" Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 61/70] sched/fair: Limit sched_cfs_period_timer() loop to avoid hard lockup Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 62/70] device_cgroup: fix RCU imbalance in error case Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 63/70] mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 64/70] ALSA: info: Fix racy addition/deletion of nodes Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 65/70] percpu: stop printing kernel addresses Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 66/70] tools include: Adopt linux/bits.h Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 67/70] iomap: report collisions between directio and buffered writes to userspace Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 68/70] xfs: add the ability to join a held buffer to a defer_ops Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 69/70] xfs: hold xfs_buf locked between shortform->leaf conversion and the addition of an attribute Greg Kroah-Hartman
2019-04-24 17:10 ` [PATCH 4.14 70/70] i2c-hid: properly terminate i2c_hid_dmi_desc_override_table[] array Greg Kroah-Hartman
2019-04-24 22:05 ` [PATCH 4.14 00/70] 4.14.114-stable review kernelci.org bot
2019-04-25  5:58 ` Naresh Kamboju
2019-04-25 11:55 ` Jon Hunter
2019-04-25 16:25 ` shuah
2019-04-25 19:38 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190424170912.235330391@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=Jason@zx2c4.com \
    --cc=ebiggers@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin@strongswan.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox