From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: linux-crypto@vger.kernel.org, linux-arm-kernel@lists.infradead.org
Cc: herbert@gondor.apana.org.au, dave.martin@arm.com,
Ard Biesheuvel <ard.biesheuvel@linaro.org>
Subject: [PATCH resend 17/18] crypto: arm/aes - avoid expanded lookup tables in the final round
Date: Mon, 24 Jul 2017 11:28:19 +0100 [thread overview]
Message-ID: <20170724102820.16534-18-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20170724102820.16534-1-ard.biesheuvel@linaro.org>
For the final round, avoid the expanded and padded lookup tables
exported by the generic AES driver. Instead, for encryption, we can
perform byte loads from the same table we used for the inner rounds,
which will still be hot in the caches. For decryption, use the inverse
AES Sbox directly, which is 4x smaller than the inverse lookup table
exported by the generic driver.
This should significantly reduce the Dcache footprint of our code,
which makes the code more robust against timing attacks. It does not
introduce any additional module dependencies, given that we already
rely on the core AES module for the shared key expansion routines.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm/crypto/aes-cipher-core.S | 88 +++++++++++++++-----
1 file changed, 65 insertions(+), 23 deletions(-)
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index c817a86c4ca8..54b384084637 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/cache.h>
.text
.align 5
@@ -32,19 +33,19 @@
.endif
.endm
- .macro __load, out, in, idx
+ .macro __load, out, in, idx, sz, op
.if __LINUX_ARM_ARCH__ < 7 && \idx > 0
- ldr \out, [ttab, \in, lsr #(8 * \idx) - 2]
+ ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz]
.else
- ldr \out, [ttab, \in, lsl #2]
+ ldr\op \out, [ttab, \in, lsl #\sz]
.endif
.endm
- .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
__select \out0, \in0, 0
__select t0, \in1, 1
- __load \out0, \out0, 0
- __load t0, t0, 1
+ __load \out0, \out0, 0, \sz, \op
+ __load t0, t0, 1, \sz, \op
.if \enc
__select \out1, \in1, 0
@@ -53,10 +54,10 @@
__select \out1, \in3, 0
__select t1, \in0, 1
.endif
- __load \out1, \out1, 0
+ __load \out1, \out1, 0, \sz, \op
__select t2, \in2, 2
- __load t1, t1, 1
- __load t2, t2, 2
+ __load t1, t1, 1, \sz, \op
+ __load t2, t2, 2, \sz, \op
eor \out0, \out0, t0, ror #24
@@ -68,9 +69,9 @@
__select \t3, \in1, 2
__select \t4, \in2, 3
.endif
- __load \t3, \t3, 2
- __load t0, t0, 3
- __load \t4, \t4, 3
+ __load \t3, \t3, 2, \sz, \op
+ __load t0, t0, 3, \sz, \op
+ __load \t4, \t4, 3, \sz, \op
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
@@ -82,14 +83,14 @@
eor \out1, \out1, t2
.endm
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
.endm
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
- __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
.endm
.macro __rev, out, in
@@ -114,7 +115,7 @@
.endif
.endm
- .macro do_crypt, round, ttab, ltab
+ .macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
ldr r4, [in]
@@ -146,9 +147,12 @@
1: subs rounds, rounds, #4
\round r8, r9, r10, r11, r4, r5, r6, r7
- __adrl ttab, \ltab, ls
+ bls 2f
\round r4, r5, r6, r7, r8, r9, r10, r11
- bhi 0b
+ b 0b
+
+2: __adrl ttab, \ltab
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
@@ -170,10 +174,48 @@
.ltorg
.endm
+ .align L1_CACHE_SHIFT
+ .type __aes_arm_inverse_sbox, %object
+__aes_arm_inverse_sbox:
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+ .size __aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
+
ENTRY(__aes_arm_encrypt)
- do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
ENDPROC(__aes_arm_encrypt)
+ .align 5
ENTRY(__aes_arm_decrypt)
- do_crypt iround, crypto_it_tab, crypto_il_tab
+ do_crypt iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
ENDPROC(__aes_arm_decrypt)
--
2.9.3
next prev parent reply other threads:[~2017-07-24 10:29 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-24 10:28 [PATCH resend 00/18] crypto: ARM/arm64 roundup for v4.14 Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 01/18] crypto/algapi - use separate dst and src operands for __crypto_xor() Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 02/18] crypto/algapi - make crypto_xor() take separate dst and src arguments Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 03/18] crypto: arm64/ghash-ce - add non-SIMD scalar fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 04/18] crypto: arm64/crct10dif - add non-SIMD generic fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 05/18] crypto: arm64/crc32 - add non-SIMD scalar fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 06/18] crypto: arm64/sha1-ce - add non-SIMD generic fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 07/18] crypto: arm64/sha2-ce - add non-SIMD scalar fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 08/18] crypto: arm64/aes-ce-cipher - match round key endianness with generic code Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 09/18] crypto: arm64/aes-ce-cipher: add non-SIMD generic fallback Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 10/18] crypto: arm64/aes-ce-ccm: " Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 11/18] crypto: arm64/aes-blk - add a non-SIMD fallback for synchronous CTR Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 12/18] crypto: arm64/chacha20 - take may_use_simd() into account Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 13/18] crypto: arm64/aes-bs - implement non-SIMD fallback for AES-CTR Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 14/18] crypto: arm64/gcm - implement native driver using v8 Crypto Extensions Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 15/18] crypto: arm/ghash - add NEON accelerated fallback for vmull.p64 Ard Biesheuvel
2017-07-24 10:28 ` [PATCH resend 16/18] crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL Ard Biesheuvel
2017-07-24 10:28 ` Ard Biesheuvel [this message]
2017-07-24 10:28 ` [PATCH resend 18/18] crypto: arm64/aes - avoid expanded lookup tables in the final round Ard Biesheuvel
2017-08-02 14:46 ` [PATCH resend 00/18] crypto: ARM/arm64 roundup for v4.14 Dave Martin
2017-08-03 5:16 ` Herbert Xu
2017-08-03 6:26 ` Herbert Xu
2017-08-03 10:49 ` Dave Martin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170724102820.16534-18-ard.biesheuvel@linaro.org \
--to=ard.biesheuvel@linaro.org \
--cc=dave.martin@arm.com \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).