linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] crypto: arm64/aes-modes - get rid of literal load of addend vector
@ 2018-08-21 16:46 Ard Biesheuvel
  2018-08-21 18:04 ` Nick Desaulniers
  0 siblings, 1 reply; 5+ messages in thread
From: Ard Biesheuvel @ 2018-08-21 16:46 UTC (permalink / raw)
  To: linux-arm-kernel

Replace the literal load of the addend vector with a sequence that
composes it using immediates. While at it, tweak the code that refers
to it so it does not clobber the register, so we can take the load
out of the loop as well.

This results in generally better code, but also works around a Clang
issue, whose integrated assembler does not implement the GNU ARM asm
syntax completely, and does not support the =literal notation for
FP registers.

Cc: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/aes-modes.S | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 483a7130cf0e..e966620ee230 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -225,6 +225,14 @@ AES_ENTRY(aes_ctr_encrypt)
 	enc_prepare	w22, x21, x6
 	ld1		{v4.16b}, [x24]
 
+	/* compose addend vector { 1, 2, 3, 0 } in v8.4s */
+	movi		v7.4h, #1
+	movi		v8.4h, #2
+	uaddl		v6.4s, v7.4h, v8.4h
+	zip1		v8.8h, v7.8h, v8.8h
+	zip1		v8.4s, v8.4s, v6.4s
+	zip2		v8.8h, v8.8h, v7.8h
+
 	umov		x6, v4.d[1]		/* keep swabbed ctr in reg */
 	rev		x6, x6
 .LctrloopNx:
@@ -232,17 +240,16 @@ AES_ENTRY(aes_ctr_encrypt)
 	bmi		.Lctr1x
 	cmn		w6, #4			/* 32 bit overflow? */
 	bcs		.Lctr1x
-	ldr		q8, =0x30000000200000001	/* addends 1,2,3[,0] */
 	dup		v7.4s, w6
 	mov		v0.16b, v4.16b
 	add		v7.4s, v7.4s, v8.4s
 	mov		v1.16b, v4.16b
-	rev32		v8.16b, v7.16b
+	rev32		v7.16b, v7.16b
 	mov		v2.16b, v4.16b
 	mov		v3.16b, v4.16b
-	mov		v1.s[3], v8.s[0]
-	mov		v2.s[3], v8.s[1]
-	mov		v3.s[3], v8.s[2]
+	mov		v1.s[3], v7.s[0]
+	mov		v2.s[3], v7.s[1]
+	mov		v3.s[3], v7.s[2]
 	ld1		{v5.16b-v7.16b}, [x20], #48	/* get 3 input blocks */
 	bl		aes_encrypt_block4x
 	eor		v0.16b, v5.16b, v0.16b
@@ -296,7 +303,6 @@ AES_ENTRY(aes_ctr_encrypt)
 	ins		v4.d[0], x7
 	b		.Lctrcarrydone
 AES_ENDPROC(aes_ctr_encrypt)
-	.ltorg
 
 
 	/*
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2018-08-21 18:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-08-21 16:46 [PATCH] crypto: arm64/aes-modes - get rid of literal load of addend vector Ard Biesheuvel
2018-08-21 18:04 ` Nick Desaulniers
2018-08-21 18:19   ` Ard Biesheuvel
2018-08-21 18:34     ` Nick Desaulniers
2018-08-21 18:38       ` Ard Biesheuvel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).