From: Dave Watson <davejwatson@fb.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
Junaid Shahid <junaids@google.com>,
Steffen Klassert <steffen.klassert@secunet.com>,
<linux-crypto@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>,
Hannes Frederic Sowa <hannes@stressinduktion.org>,
Tim Chen <tim.c.chen@linux.intel.com>,
Sabrina Dubroca <sd@queasysnail.net>,
<linux-kernel@vger.kernel.org>,
Stephan Mueller <smueller@chronox.de>,
Ilya Lesokhin <ilyal@mellanox.com>
Subject: [PATCH 11/14] x86/crypto: aesni: Introduce partial block macro
Date: Mon, 12 Feb 2018 11:50:44 -0800 [thread overview]
Message-ID: <20180212195044.GA60984@davejwatson-mba.local> (raw)
In-Reply-To: <cover.1518211765.git.davejwatson@fb.com>
Before this diff, multiple calls to GCM_ENC_DEC will
succeed, but only if all calls are a multiple of 16 bytes.
Handle partial blocks at the start of GCM_ENC_DEC, and update
aadhash as appropriate.
The data offset %r11 is also updated after the partial block.
Signed-off-by: Dave Watson <davejwatson@fb.com>
---
arch/x86/crypto/aesni-intel_asm.S | 151 +++++++++++++++++++++++++++++++++++++-
1 file changed, 150 insertions(+), 1 deletion(-)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 3ada06b..398bd2237f 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -284,7 +284,13 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu AadHash(%arg2), %xmm8
movdqu HashKey(%arg2), %xmm13
add %arg5, InLen(%arg2)
+
+ xor %r11, %r11 # initialise the data pointer offset as zero
+ PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
+
+ sub %r11, %arg5 # sub partial block data used
mov %arg5, %r13 # save the number of bytes
+
and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
mov %r13, %r12
# Encrypt/Decrypt first few blocks
@@ -605,6 +611,150 @@ _get_AAD_done\@:
movdqu \TMP6, AadHash(%arg2)
.endm
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+ AAD_HASH operation
+ mov PBlockLen(%arg2), %r13
+ cmp $0, %r13
+ je _partial_block_done_\@ # Leave Macro if no partial blocks
+ # Read in input data without over reading
+ cmp $16, \PLAIN_CYPH_LEN
+ jl _fewer_than_16_bytes_\@
+ movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
+ jmp _data_read_\@
+
+_fewer_than_16_bytes_\@:
+ lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+ mov \PLAIN_CYPH_LEN, %r12
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
+
+ mov PBlockLen(%arg2), %r13
+
+_data_read_\@: # Finished reading in data
+
+ movdqu PBlockEncKey(%arg2), %xmm9
+ movdqu HashKey(%arg2), %xmm13
+
+ lea SHIFT_MASK(%rip), %r12
+
+ # adjust the shuffle mask pointer to be able to shift r13 bytes
+ # r16-r13 is the number of bytes in plaintext mod 16)
+ add %r13, %r12
+ movdqu (%r12), %xmm2 # get the appropriate shuffle mask
+ PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes
+
+.ifc \operation, dec
+ movdqa %xmm1, %xmm3
+ pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_1_\@
+ sub %r10, %r12
+_no_extra_mask_1_\@:
+
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9
+
+ pand %xmm1, %xmm3
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm3
+ PSHUFB_XMM %xmm2, %xmm3
+ pxor %xmm3, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_1_\@
+
+ # GHASH computation for the last <16 Byte block
+ GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %rax,%rax
+
+ mov %rax, PBlockLen(%arg2)
+ jmp _dec_done_\@
+_partial_incomplete_1_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_dec_done_\@:
+ movdqu \AAD_HASH, AadHash(%arg2)
+.else
+ pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_2_\@
+ sub %r10, %r12
+_no_extra_mask_2_\@:
+
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ pand %xmm1, %xmm9
+
+ movdqa SHUF_MASK(%rip), %xmm1
+ PSHUFB_XMM %xmm1, %xmm9
+ PSHUFB_XMM %xmm2, %xmm9
+ pxor %xmm9, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_2_\@
+
+ # GHASH computation for the last <16 Byte block
+ GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %rax,%rax
+
+ mov %rax, PBlockLen(%arg2)
+ jmp _encode_done_\@
+_partial_incomplete_2_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_encode_done_\@:
+ movdqu \AAD_HASH, AadHash(%arg2)
+
+ movdqa SHUF_MASK(%rip), %xmm10
+ # shuffle xmm9 back to output as ciphertext
+ PSHUFB_XMM %xmm10, %xmm9
+ PSHUFB_XMM %xmm2, %xmm9
+.endif
+ # output encrypted Bytes
+ cmp $0, %r10
+ jl _partial_fill_\@
+ mov %r13, %r12
+ mov $16, %r13
+ # Set r13 to be the number of bytes to write out
+ sub %r12, %r13
+ jmp _count_set_\@
+_partial_fill_\@:
+ mov \PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+ movdqa %xmm9, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_\@
+
+ mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $8, \DATA_OFFSET
+ psrldq $8, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_\@:
+ movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $1, \DATA_OFFSET
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
+
/*
* if a = number of total plaintext bytes
* b = floor(a/16)
@@ -623,7 +773,6 @@ _get_AAD_done\@:
movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0
- xor %r11, %r11 # initialise the data pointer offset as zero
# start AES for num_initial_blocks blocks
movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0
--
2.9.5
next prev parent reply other threads:[~2018-02-12 19:50 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <cover.1518211765.git.davejwatson@fb.com>
2018-02-12 19:47 ` [PATCH 01/14] x86/crypto: aesni: Merge INITIAL_BLOCKS_ENC/DEC Dave Watson
2018-02-12 19:48 ` [PATCH 02/14] x86/crypto: aesni: Macro-ify func save/restore Dave Watson
2018-02-12 19:48 ` [PATCH 03/14] x86/crypto: aesni: Add GCM_INIT macro Dave Watson
2018-02-12 19:48 ` [PATCH 04/14] x86/crypto: aesni: Add GCM_COMPLETE macro Dave Watson
2018-02-12 19:49 ` [PATCH 05/14] x86/crypto: aesni: Merge encode and decode to GCM_ENC_DEC macro Dave Watson
2018-02-12 19:49 ` [PATCH 06/14] x86/crypto: aesni: Introduce gcm_context_data Dave Watson
2018-02-12 19:49 ` [PATCH 07/14] x86/crypto: aesni: Split AAD hash calculation to separate macro Dave Watson
2018-02-12 19:49 ` [PATCH 08/14] x86/crypto: aesni: Fill in new context data structures Dave Watson
2018-02-12 19:50 ` [PATCH 09/14] x86/crypto: aesni: Move ghash_mul to GCM_COMPLETE Dave Watson
2018-02-12 19:50 ` [PATCH 10/14] x86/crypto: aesni: Move HashKey computation from stack to gcm_context Dave Watson
2018-02-12 19:50 ` Dave Watson [this message]
2018-02-12 19:50 ` [PATCH 12/14] x86/crypto: aesni: Add fast path for > 16 byte update Dave Watson
2018-02-12 19:51 ` [PATCH 13/14] x86/crypto: aesni: Introduce scatter/gather asm function stubs Dave Watson
2018-02-12 19:51 ` [PATCH 14/14] x86/crypto: aesni: Update aesni-intel_glue to use scatter/gather Dave Watson
2018-02-12 23:12 ` Junaid Shahid
2018-02-13 18:22 ` Dave Watson
2018-02-13 19:49 ` Junaid Shahid
2018-02-13 7:42 ` Stephan Mueller
2018-02-13 18:42 ` Dave Watson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180212195044.GA60984@davejwatson-mba.local \
--to=davejwatson@fb.com \
--cc=davem@davemloft.net \
--cc=hannes@stressinduktion.org \
--cc=herbert@gondor.apana.org.au \
--cc=ilyal@mellanox.com \
--cc=junaids@google.com \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=sd@queasysnail.net \
--cc=smueller@chronox.de \
--cc=steffen.klassert@secunet.com \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox