* Re: [PATCH 0/3] Fix crypto/vmx/p8_ghash memory corruption
From: Herbert Xu @ 2016-10-10 2:16 UTC (permalink / raw)
To: Marcelo Cerri; +Cc: linux-crypto
In-Reply-To: <20161003150725.GF10133@gallifrey>
On Mon, Oct 03, 2016 at 12:07:25PM -0300, Marcelo Cerri wrote:
> Hi Herbert,
>
> Sorry for bothering you. I noticed you included two of the patches in
> the crypto-2.6 repository and the remaining one in cryptodev-2.6. Is
> that right? I thought all 3 patches would be included in the cruptodev
> repository.
I wanted the first two to go to stable as well so that's why
I split them up.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* [PATCH 6/6] crypto: arm64/aes-neon - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The AES implementation using pure NEON instructions relies on the generic
AES key schedule generation routines, which store the round keys as arrays
of 32-bit quantities stored in memory using native endianness. This means
we should refer to these round keys using 4x4 loads rather than 16x1 loads.
In addition, the ShiftRows tables are loading using a single scalar load,
which is also affected by endianness, so emit these tables in the correct
order depending on whether we are building for big endian or not.
Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-neon.S | 25 ++++++++++++--------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index b93170e1cc93..85f07ead7c5c 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
@@ -83,13 +84,13 @@
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -229,7 +230,7 @@
.endm
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -237,7 +238,7 @@
sub_bytes_2x \in0, \in1
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -254,7 +255,7 @@
.endm
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -266,7 +267,7 @@
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -306,12 +307,16 @@
.text
.align 4
.LForward_ShiftRows:
- .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
- .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
+CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
+CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
+CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
.LReverse_ShiftRows:
- .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
- .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
+CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
+CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
+CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
--
2.7.4
^ permalink raw reply related
* [PATCH 5/6] crypto: arm64/aes-ccm-ce: fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions
refers to the AES round keys as pairs of 64-bit quantities, which causes
failures when building the code for big endian. In addition, it byte swaps
the input counter unconditionally, while this is only required for little
endian builds. So fix both issues.
Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-ce-ccm-core.S | 53 ++++++++++----------
1 file changed, 27 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index a2a7fbcacc14..3363560c79b7 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
.text
.arch armv8-a+crypto
@@ -19,7 +20,7 @@
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
- ld1 {v0.2d}, [x0] /* load mac */
+ ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
eor v1.16b, v1.16b, v1.16b
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
beq 8f /* out of input? */
cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b
-1: ld1 {v3.2d}, [x4] /* load first round key */
+1: ld1 {v3.16b}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
- ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
+ ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
-4: ld1 {v3.2d}, [x6], #16 /* load next round key */
+4: ld1 {v3.16b}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
-5: ld1 {v4.2d}, [x6], #16 /* load next round key */
+5: ld1 {v4.16b}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
- ld1 {v5.2d}, [x6], #16 /* load next round key */
+ ld1 {v5.16b}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
-6: st1 {v0.2d}, [x0] /* store mac */
+6: st1 {v0.16b}, [x0] /* store mac */
beq 10f
adds w2, w2, #16
beq 10f
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
- st1 {v0.2d}, [x0]
+ st1 {v0.16b}, [x0]
10: str w8, [x3]
ret
ENDPROC(ce_aes_ccm_auth_data)
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
* u32 rounds);
*/
ENTRY(ce_aes_ccm_final)
- ld1 {v3.2d}, [x2], #16 /* load first round key */
- ld1 {v0.2d}, [x0] /* load mac */
+ ld1 {v3.16b}, [x2], #16 /* load first round key */
+ ld1 {v0.16b}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */
- ld1 {v1.2d}, [x1] /* load 1st ctriv */
+ ld1 {v1.16b}, [x1] /* load 1st ctriv */
bmi 0f
bne 3f
mov v5.16b, v3.16b
b 2f
0: mov v4.16b, v3.16b
-1: ld1 {v5.2d}, [x2], #16 /* load next round key */
+1: ld1 {v5.16b}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-2: ld1 {v3.2d}, [x2], #16 /* load next round key */
+2: ld1 {v3.16b}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-3: ld1 {v4.2d}, [x2], #16 /* load next round key */
+3: ld1 {v4.16b}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
aese v1.16b, v4.16b
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
- st1 {v0.2d}, [x0] /* store result */
+ st1 {v0.16b}, [x0] /* store result */
ret
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
- ld1 {v0.2d}, [x5] /* load mac */
- rev x8, x8 /* keep swabbed ctr in reg */
+ ld1 {v0.16b}, [x5] /* load mac */
+CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
0: /* outer loop */
- ld1 {v1.1d}, [x6] /* load upper ctr */
+ ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
- ld1 {v3.2d}, [x3] /* load first round key */
+ ld1 {v3.16b}, [x3] /* load first round key */
add x10, x3, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
b 3f
1: mov v4.16b, v3.16b
- ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
+ ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-3: ld1 {v3.2d}, [x10], #16 /* load next round key */
+3: ld1 {v3.16b}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-4: ld1 {v4.2d}, [x10], #16 /* load next round key */
+4: ld1 {v4.16b}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
- ld1 {v5.2d}, [x10], #16 /* load next round key */
+ ld1 {v5.16b}, [x10], #16 /* load next round key */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
- rev x8, x8
- st1 {v0.2d}, [x5] /* store mac */
+CPU_LE( rev x8, x8 )
+ st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.2d}, [x5] /* store mac */
+ st1 {v0.16b}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
--
2.7.4
^ permalink raw reply related
* [PATCH 4/6] crypto: arm64/sha2-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The SHA256 digest is an array of 8 32-bit quantities, so we should refer
to them as such in order for this code to work correctly when built for
big endian. So replace 16 byte scalar loads and stores with 4x32 vector
ones where appropriate.
Fixes: 6ba6c74dfc6b ("arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/sha2-ce-core.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 5df9d9d470ad..01cfee066837 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform)
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
- ldp dga, dgb, [x0]
+ ld1 {dgav.4s, dgbv.4s}, [x0]
/* load sha256_ce_state::finalize */
ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
@@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b )
b 1b
/* store new state */
-3: stp dga, dgb, [x0]
+3: st1 {dgav.4s, dgbv.4s}, [x0]
ret
ENDPROC(sha2_ce_transform)
--
2.7.4
^ permalink raw reply related
* [PATCH 3/6] crypto: arm64/sha1-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The SHA1 digest is an array of 5 32-bit quantities, so we should refer
to them as such in order for this code to work correctly when built for
big endian. So replace 16 byte scalar loads and stores with 4x4 vector
ones where appropriate.
Fixes: 2c98833a42cd ("arm64/crypto: SHA-1 using ARMv8 Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/sha1-ce-core.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 033aae6d732a..c98e7e849f06 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform)
ld1r {k3.4s}, [x6]
/* load state */
- ldr dga, [x0]
+ ld1 {dgav.4s}, [x0]
ldr dgb, [x0, #16]
/* load sha1_ce_state::finalize */
@@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b )
b 1b
/* store new state */
-3: str dga, [x0]
+3: st1 {dgav.4s}, [x0]
str dgb, [x0, #16]
ret
ENDPROC(sha1_ce_transform)
--
2.7.4
^ permalink raw reply related
* [PATCH 2/6] crypto: arm64/ghash-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The GHASH key and digest are both pairs of 64-bit quantities, but the
GHASH code does not always refer to them as such, causing failures when
built for big endian. So replace the 16x1 loads and stores with 2x8 ones.
Fixes: b913a6404ce2 ("arm64/crypto: improve performance of GHASH algorithm")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/ghash-ce-core.S | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index dc457015884e..f0bb9f0b524f 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -29,8 +29,8 @@
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
- ld1 {SHASH.16b}, [x3]
- ld1 {XL.16b}, [x1]
+ ld1 {SHASH.2d}, [x3]
+ ld1 {XL.2d}, [x1]
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
shl MASK.2d, MASK.2d, #57
@@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b )
cbnz w0, 0b
- st1 {XL.16b}, [x1]
+ st1 {XL.2d}, [x1]
ret
ENDPROC(pmull_ghash_update)
--
2.7.4
^ permalink raw reply related
* [PATCH 1/6] crypto: arm64/aes-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>
The core AES cipher implementation that uses ARMv8 Crypto Extensions
instructions erroneously loads the round keys as 64-bit quantities,
which causes the algorithm to fail when built for big endian. In
addition, the key schedule generation routine fails to take endianness
into account as well, when loading the combining the input key with
the round constants. So fix both issues.
Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/aes-ce-cipher.c | 25 ++++++++++++--------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index f7bd9bf0bbb3..50d9fe11d0c8 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.2d}, [%[key]], #16 ;"
+ " ld1 {v1.16b}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
"1: aese v0.16b, v2.16b ;"
" aesmc v0.16b, v0.16b ;"
- "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ "2: ld1 {v1.16b}, [%[key]], #16 ;"
" aese v0.16b, v3.16b ;"
" aesmc v0.16b, v0.16b ;"
- "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ "3: ld1 {v2.16b}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aese v0.16b, v1.16b ;"
" aesmc v0.16b, v0.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
" bpl 1b ;"
" aese v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.2d}, [%[key]], #16 ;"
+ " ld1 {v1.16b}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
"1: aesd v0.16b, v2.16b ;"
" aesimc v0.16b, v0.16b ;"
- "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ "2: ld1 {v1.16b}, [%[key]], #16 ;"
" aesd v0.16b, v3.16b ;"
" aesimc v0.16b, v0.16b ;"
- "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ "3: ld1 {v2.16b}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aesd v0.16b, v1.16b ;"
" aesimc v0.16b, v0.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
" bpl 1b ;"
" aesd v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
+#ifndef CONFIG_CPU_BIG_ENDIAN
rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+#else
+ rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+ rki[0];
+#endif
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
--
2.7.4
^ permalink raw reply related
* [PATCH 0/6] crypto: arm64 - big endian fixes
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
To: linux-crypto, linux-arm-kernel, herbert
Cc: catalin.marinas, will.deacon, Ard Biesheuvel
As it turns out, none of the accelerated crypto routines under arch/arm64/crypto
currently work, or have ever worked correctly when built for big endian. So this
series fixes all of them.
Each of these patches carries a fixes tag, and could be backported to stable.
However, for patches #1 and #5, the fixes tag denotes the oldest commit that the
fix is compatible with, not the patch that introduced the algorithm. This is due
to the fact that the key schedules are incompatible between generic AES and the
arm64 Crypto Extensions implementation (but only when building for big endian)
This is not a problem in practice, but it does mean that the AES-CCM and AES in
EBC/CBC/CTR/XTS mode implementations before v3.19 require a different fix, i.e.,
one that is compatible with the generic AES key schedule generation code (which
it currently no longer uses)
In any case, please apply with cc to stable.
Ard Biesheuvel (6):
crypto: arm64/aes-ce - fix for big endian
crypto: arm64/ghash-ce - fix for big endian
crypto: arm64/sha1-ce - fix for big endian
crypto: arm64/sha2-ce - fix for big endian
crypto: arm64/aes-ccm-ce: fix for big endian
crypto: arm64/aes-neon - fix for big endian
arch/arm64/crypto/aes-ce-ccm-core.S | 53 ++++++++++----------
arch/arm64/crypto/aes-ce-cipher.c | 25 +++++----
arch/arm64/crypto/aes-neon.S | 25 +++++----
arch/arm64/crypto/ghash-ce-core.S | 6 +--
arch/arm64/crypto/sha1-ce-core.S | 4 +-
arch/arm64/crypto/sha2-ce-core.S | 4 +-
6 files changed, 64 insertions(+), 53 deletions(-)
--
2.7.4
^ permalink raw reply
* chacha counters in random.c
From: Sami Farin @ 2016-10-09 17:16 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: linux-crypto
commit e192be9d9a30555aae2ca1dc3aad37cba484cd4a
+ chacha20_block(&crng->state[0], out);
+ if (crng->state[12] == 0)
+ crng->state[13]++;
Did you mean
+ if (++crng->state[12] == 0)
?
--
Do what you love because life is too short for anything else.
https://samifar.in/
^ permalink raw reply
* Re: Observed a ecryptFS crash
From: xiakaixu @ 2016-10-09 7:12 UTC (permalink / raw)
To: tyhicks@canonical.com
Cc: liushuoran, linux-crypto@vger.kernel.org,
ecryptfs@vger.kernel.org, linux-kernel@vger.kernel.org,
Yaodongdong, Wangbintian, yingjindong, Yezongbo, likan (A)
In-Reply-To: <00B10D30F2BAA743B48953A4D86C96D54CB653@SZXEMI506-MBS.china.huawei.com>
ping...
> Hi Tyhicks,
>
> We observed a ecryptFS crash occasionally in Linux kernel 4.1.18. The call trace is attached below. Is it a known issue? Look forward to hearing from you. Thanks in advance!
>
> [19314.529479s][pid:2694,cpu3,GAC_Executor[0]]Call trace:
> [19314.529510s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000f3898>] do_raw_spin_lock+0x20/0x200
> [19314.529510s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc001031fb0>] _raw_spin_lock+0x28/0x34
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0003908e0>] selinux_inode_free_security+0x3c/0x94
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000386b04>] security_inode_free+0x2c/0x38
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001fff88>] __destroy_inode+0x2c/0x180
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000201660>] destroy_inode+0x30/0xa0
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002017d8>] evict+0x108/0x1c0
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000202588>] iput+0x184/0x258
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002f27d0>] ecryptfs_evict_inode+0x30/0x3c
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc00020177c>] evict+0xac/0x1c0
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002018d4>] dispose_list+0x44/0x5c
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000202d28>] evict_inodes+0xcc/0x12c
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e5d04>] generic_shutdown_super+0x58/0xe4
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e7164>] kill_anon_super+0x30/0x74
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002f16a4>] ecryptfs_kill_block_super+0x24/0x54
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e6690>] deactivate_locked_super+0x60/0x8c
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e6754>] deactivate_super+0x98/0xa4
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000206d54>] cleanup_mnt+0x50/0xd0
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000206e48>] __cleanup_mnt+0x20/0x2c
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000c1bac>] task_work_run+0xbc/0xf8
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000a3e98>] do_exit+0x2d4/0xa14
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000a56a8>] do_group_exit+0x60/0xf8
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000b26ac>] get_signal+0x284/0x598
> [19314.529754s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc00008950c>] do_signal+0x170/0x5b8
> [19314.529754s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000089be0>] do_notify_resume+0x70/0x78
> [19314.529785s][pid:2694,cpu3,GAC_Executor[0]]Code: aa0003f3 aa1e03e0 97fe7718 5289d5a0 (b9400661)
> [19314.529907s][pid:2694,cpu3,GAC_Executor[0]]---[ end trace 382e4b6264b035b5 ]---
> [19314.529907s][pid:2694,cpu3,GAC_Executor[0]]Kernel panic - not syncing: Fatal exception
>
> Regards,
> Shuoran
>
^ permalink raw reply
* Re: PEACE BE WITH YOU,
From: contact @ 2016-10-06 4:37 UTC (permalink / raw)
To: Recipients
Subject: Can I Trust You In This5 Days Project ?
Email ( dr.kpyj1958@gmail.com )
Dear Friend
My name is ATTORNEY KONO PETER , I really do not mean to waste your time. Considering the fact that this is a £36,000,000.00 British Pounds.deal shear rate 50/50 % and it's bank to bank wire trasaction within Five workind days.
I carefully contact you due to many Internet frauds nowadays.but i put my faith in God because all things in life is by risk but don't let me down now or after, This is Mr.Plaviashakunthala Lobo and his family was involved in plan crash 22nd of May 2010 in List of passengers on Air India Express flight that crash 32.Plaviashakunthala Lobo 33. Venishanikola Lobo
34. Vishalfloid Lobo (child) and all family died without any inheritance or next of kin so i want you to work together with me been an attorney so this is Mr.Plaviashakunthala Lobo account details.
Bank name: Bank of Africa
Bank Address: Cotonou Benin Republic
Account name: Plavia shakunthala Lobo
Account Number: 1103-8022-1351
Account Balance: £36,000,000.00 British Pounds (GBP)
Date of deposit: 19th December, 2009
Account officer : Bashirudeen Hussam
I was with Mr.Plaviashakunthala Lobo as a legal witness when this money was deposited as fixed deposit in 2009. Since his demise, I have visited this bank three times. Contact the bank and ask for the confirmation of his involvement in the plane crash.check the website: http://www.thehindu.com/news/national/list-of-passengers-on-air-india-express-flight/article435569.ece
Thanks
ATTORNEY KONO PETER
------------------------------------------
Disclaimer: This message transmitted with it are confidential and privileged. If you have received it in error, please notify the sender by return e-mail and delete this message from your system. If you are not the intended recipient you are hereby notified that any dissemination, copy or disclosure of this e-mail is strictly prohibited.
---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus
^ permalink raw reply
* Re: [PATCH] crypto: caam: add support for iMX6UL
From: Rob Herring @ 2016-10-09 1:29 UTC (permalink / raw)
To: Marcus Folkesson
Cc: herbert, davem, mark.rutland, horia.geanta, tudor-dan.ambarus,
alexandru.porosanu, arnd, linux-crypto, devicetree, linux-kernel
In-Reply-To: <20161004133259.GA30071@gmail.com>
On Tue, Oct 04, 2016 at 09:32:59AM -0400, Marcus Folkesson wrote:
> i.MX6UL does only require three clocks to enable CAAM module.
>
> Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
> ---
> .../devicetree/bindings/crypto/fsl-sec4.txt | 20 +++++++++++++
Acked-by: Rob Herring <robh@kernel.org>
> drivers/crypto/caam/ctrl.c | 35 ++++++++++++----------
> 2 files changed, 40 insertions(+), 15 deletions(-)
^ permalink raw reply
* [PATCH] crypto: api - Remove no-op exit_ops code
From: Eric Biggers @ 2016-10-07 21:13 UTC (permalink / raw)
To: herbert, davem; +Cc: linux-crypto, Eric Biggers
crypto_exit_cipher_ops() and crypto_exit_compress_ops() are no-ops and
have been for a long time, so remove them.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
crypto/api.c | 20 ++------------------
crypto/cipher.c | 4 ----
crypto/compress.c | 4 ----
crypto/internal.h | 3 ---
4 files changed, 2 insertions(+), 29 deletions(-)
diff --git a/crypto/api.c b/crypto/api.c
index bbc147c..a88729f 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
{
const struct crypto_type *type = tfm->__crt_alg->cra_type;
- if (type) {
- if (tfm->exit)
- tfm->exit(tfm);
- return;
- }
-
- switch (crypto_tfm_alg_type(tfm)) {
- case CRYPTO_ALG_TYPE_CIPHER:
- crypto_exit_cipher_ops(tfm);
- break;
-
- case CRYPTO_ALG_TYPE_COMPRESS:
- crypto_exit_compress_ops(tfm);
- break;
-
- default:
- BUG();
- }
+ if (type && tfm->exit)
+ tfm->exit(tfm);
}
static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 39541e0..94fa355 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
return 0;
}
-
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/compress.c b/crypto/compress.c
index c33f076..f2d5229 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm)
return 0;
}
-
-void crypto_exit_compress_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/internal.h b/crypto/internal.h
index 7eefcdb..f073204 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
int crypto_init_cipher_ops(struct crypto_tfm *tfm);
int crypto_init_compress_ops(struct crypto_tfm *tfm);
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
-void crypto_exit_compress_ops(struct crypto_tfm *tfm);
-
struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
void crypto_larval_kill(struct crypto_alg *alg);
struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related
* [PATCH] crypto: skcipher - Remove unused crypto_lookup_skcipher() declaration
From: Eric Biggers @ 2016-10-07 21:13 UTC (permalink / raw)
To: herbert, davem; +Cc: linux-crypto, Eric Biggers
The definition of crypto_lookup_skcipher() was already removed in
commit 3a01d0ee2b99 ("crypto: skcipher - Remove top-level givcipher
interface"). So the declaration should be removed too.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
include/crypto/internal/skcipher.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index a21a95e..95d2a18 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -74,8 +74,6 @@ static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
return crypto_grab_skcipher(spawn, name, type, mask);
}
-struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask);
-
static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
{
crypto_drop_spawn(&spawn->base);
--
2.8.0.rc3.226.g39d4020
^ permalink raw reply related
* [PATCH] crypto: crypto4xx - Fix size used in dma_free_coherent()
From: Christophe JAILLET @ 2016-10-07 20:36 UTC (permalink / raw)
To: herbert, davem
Cc: linux-crypto, linux-kernel, kernel-janitors, Christophe JAILLET
The size used in 'dma_free_coherent()' looks un-initialized here.
ctx->sa_len is set a few lines below and is apparently not set by the
caller.
So use 'size' as in the corresponding 'dma_alloc_coherent()' a few lines
above.
This has been spotted with coccinelle, using the following script:
////////////////////
@r@
expression x0, x1, y0, y1, z0, z1, t0, t1, ret;
@@
* ret = dma_alloc_coherent(x0, y0, z0, t0);
...
* dma_free_coherent(x1, y1, ret, t1);
@script:python@
y0 << r.y0;
y1 << r.y1;
@@
if y1.find(y0) == -1:
print "WARNING: sizes look different: '%s' vs '%s'" % (y0, y1)
////////////////////
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
drivers/crypto/amcc/crypto4xx_core.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index dae1e39139e9..d10b4ae5e0da 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4,
&ctx->sa_out_dma_addr, GFP_ATOMIC);
if (ctx->sa_out == NULL) {
- dma_free_coherent(ctx->dev->core_dev->device,
- ctx->sa_len * 4,
+ dma_free_coherent(ctx->dev->core_dev->device, size * 4,
ctx->sa_in, ctx->sa_in_dma_addr);
return -ENOMEM;
}
--
2.7.4
^ permalink raw reply related
* Re: [PATCH] padata: add helper function for queue length
From: Steffen Klassert @ 2016-10-07 3:15 UTC (permalink / raw)
To: Jason A. Donenfeld; +Cc: linux-crypto, linux-kernel
In-Reply-To: <20161002014638.8049-1-Jason@zx2c4.com>
On Sun, Oct 02, 2016 at 03:46:38AM +0200, Jason A. Donenfeld wrote:
> Since padata has a maximum number of inflight jobs, currently 1000, it's
> very useful to know how many jobs are currently queued up. This adds a
> simple helper function to expose this information.
>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
> include/linux/padata.h | 2 ++
> kernel/padata.c | 16 ++++++++++++++++
> 2 files changed, 18 insertions(+)
>
> diff --git a/include/linux/padata.h b/include/linux/padata.h
> index 113ee62..4840ae4 100644
> --- a/include/linux/padata.h
> +++ b/include/linux/padata.h
> @@ -3,6 +3,7 @@
> *
> * Copyright (C) 2008, 2009 secunet Security Networks AG
> * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
> + * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
> *
> * This program is free software; you can redistribute it and/or modify it
> * under the terms and conditions of the GNU General Public License,
> @@ -181,4 +182,5 @@ extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
> struct notifier_block *nblock);
> extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
> struct notifier_block *nblock);
> +extern int padata_queue_len(struct padata_instance *pinst);
> #endif
> diff --git a/kernel/padata.c b/kernel/padata.c
> index 9932788..17c1e08 100644
> --- a/kernel/padata.c
> +++ b/kernel/padata.c
> @@ -5,6 +5,7 @@
> *
> * Copyright (C) 2008, 2009 secunet Security Networks AG
> * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
> + * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
> *
> * This program is free software; you can redistribute it and/or modify it
> * under the terms and conditions of the GNU General Public License,
> @@ -1039,3 +1040,18 @@ void padata_free(struct padata_instance *pinst)
> kobject_put(&pinst->kobj);
> }
> EXPORT_SYMBOL(padata_free);
> +
> +/**
> + * padata_queue_len - retreive the number of in progress jobs
> + *
> + * @padata_inst: padata instance from which to read the queue size
> + */
> +int padata_queue_len(struct padata_instance *pinst)
> +{
> + int len;
> + rcu_read_lock_bh();
> + len = atomic_read(&rcu_dereference_bh(pinst->pd)->refcnt);
> + rcu_read_unlock_bh();
> + return len;
> +}
> +EXPORT_SYMBOL(padata_queue_len);
Why you want to have this? Without having a user of this function,
there is no point on adding it.
^ permalink raw reply
* Re: [PATCH] crypto: caam: add support for iMX6UL
From: Horia Geanta Neag @ 2016-10-06 7:12 UTC (permalink / raw)
To: Marcus Folkesson, herbert@gondor.apana.org.au,
davem@davemloft.net, robh+dt@kernel.org, mark.rutland@arm.com,
Tudor-Dan Ambarus, Alexandru Porosanu, arnd@arndb.de
Cc: linux-crypto@vger.kernel.org, devicetree@vger.kernel.org,
linux-kernel@vger.kernel.org
In-Reply-To: <20161004133259.GA30071@gmail.com>
On 10/4/2016 10:33 AM, Marcus Folkesson wrote:
> i.MX6UL does only require three clocks to enable CAAM module.
>
> Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Thanks,
Horia
^ permalink raw reply
* Re: [PATCH v3 0/8] Add support for SafeXcel IP-76 to OMAP RNG
From: Matthijs van Duin @ 2016-10-05 20:37 UTC (permalink / raw)
To: Romain Perier
Cc: dsaxena, mpm, Herbert Xu, Gregory Clement, Thomas Petazzoni,
Nadav Haklai, Omri Itach, Shadi Ammouri, Yahuda Yitschak,
Hanna Hawa, Neta Zur Hershkovits, Igal Liberman, Marcin Wojtas,
linux-crypto, linux-omap@vger.kernel.org
In-Reply-To: <57E8F1F1.6040906@free-electrons.com>
>>> The driver omap-rng has a lot of similarity with the IP block SafeXcel
>>> IP-76. A lot of registers are the same and the way that the driver works
>>> is very closed the description of the TRNG EIP76 in its datasheet.
Specifically the omap rng, at least the version I've examined, is a
SafeXcel EIP-75a core with a TI wrapper according to my notes. I don't
remember anymore where I obtained the "a", but the peripheral does
identify itself as EIP-75 v2.0.0 in the register at offset 0x7c.
Matthijs
^ permalink raw reply
* [PATCH v4 0/2] Improve DMA chaining for ahash requests
From: Romain Perier @ 2016-10-05 7:56 UTC (permalink / raw)
To: Boris Brezillon, Arnaud Ebalard
Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
Ofer Heifetz, linux-crypto, linux-arm-kernel
This series contain performance improvement regarding ahash requests.
So far, ahash requests were systematically not chained at the DMA level.
However, in some case, like this is the case by using IPSec, some ahash
requests can be processed directly by the engine, and don't have
intermediaire partial update states.
This series firstly re-work the way outer IVs are copied from the SRAM
into the dma pool. To do so, we introduce a common dma pool for all type
of requests that contains outer results (like IV or digest). Then, for
ahash requests that can be processed directly by the engine, outer
results are copied from the SRAM into the common dma pool. These requests
are then allowed to be chained at the DMA level.
Benchmarking results with iperf throught IPSec
==============================================
ESP AH
Before 343 Mbits/s 492 Mbits/s
After 422 Mbits/s 577 Mbits/s
Improvement +23% +17%
Romain Perier (2):
crypto: marvell - Use an unique pool to copy results of requests
crypto: marvell - Don't break chain for computable last ahash requests
drivers/crypto/marvell/cesa.c | 4 ---
drivers/crypto/marvell/cesa.h | 5 ++--
drivers/crypto/marvell/cipher.c | 8 +++--
drivers/crypto/marvell/hash.c | 65 +++++++++++++++++++++++++++++++----------
drivers/crypto/marvell/tdma.c | 28 +++++++++---------
5 files changed, 70 insertions(+), 40 deletions(-)
--
2.9.3
^ permalink raw reply
* [PATCH v4 2/2] crypto: marvell - Don't break chain for computable last ahash requests
From: Romain Perier @ 2016-10-05 7:56 UTC (permalink / raw)
To: Boris Brezillon, Arnaud Ebalard
Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161005075633.12711-1-romain.perier@free-electrons.com>
Currently, the driver breaks chain for all kind of hash requests in order to
don't override intermediate states of partial ahash updates. However, some final
ahash requests can be directly processed by the engine, and so without
intermediate state. This is typically the case for most for the HMAC requests
processed via IPSec.
This commits adds a TDMA descriptor to copy context for these of requests
into the "op" dma pool, then it allow to chain these requests at the DMA level.
The 'complete' operation is also updated to retrieve the MAC digest from the
right location.
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
---
Changes in v4:
- Remove the dummy descriptor at the end of the chain, when a TDMA_RESULT
is present. So, we re-wrote a bit the code of ahash_complete accordingly.
Changes in v3:
- Copy the whole context back to RAM and not just the digest. Also
fixed a rebase issue ^^ (whoops)
Changes in v2:
- Replaced BUG_ON by an error
- Add a variable "break_chain", with "type" to break the chain
drivers/crypto/marvell/hash.c | 65 ++++++++++++++++++++++++++++++++-----------
1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 9f28468..2a92605 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -312,24 +312,40 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
int i;
digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
- for (i = 0; i < digsize / 4; i++)
- creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
- if (creq->last_req) {
+ if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+ (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+ __le32 *data = NULL;
+
/*
- * Hardware's MD5 digest is in little endian format, but
- * SHA in big endian format
+ * Result is already in the correct endianess when the SA is
+ * used
*/
- if (creq->algo_le) {
- __le32 *result = (void *)ahashreq->result;
+ data = creq->base.chain.last->op->ctx.hash.hash;
+ for (i = 0; i < digsize / 4; i++)
+ creq->state[i] = cpu_to_le32(data[i]);
- for (i = 0; i < digsize / 4; i++)
- result[i] = cpu_to_le32(creq->state[i]);
- } else {
- __be32 *result = (void *)ahashreq->result;
+ memcpy(ahashreq->result, data, digsize);
+ } else {
+ for (i = 0; i < digsize / 4; i++)
+ creq->state[i] = readl_relaxed(engine->regs +
+ CESA_IVDIG(i));
+ if (creq->last_req) {
+ /*
+ * Hardware's MD5 digest is in little endian format, but
+ * SHA in big endian format
+ */
+ if (creq->algo_le) {
+ __le32 *result = (void *)ahashreq->result;
+
+ for (i = 0; i < digsize / 4; i++)
+ result[i] = cpu_to_le32(creq->state[i]);
+ } else {
+ __be32 *result = (void *)ahashreq->result;
- for (i = 0; i < digsize / 4; i++)
- result[i] = cpu_to_be32(creq->state[i]);
+ for (i = 0; i < digsize / 4; i++)
+ result[i] = cpu_to_be32(creq->state[i]);
+ }
}
}
@@ -504,6 +520,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
CESA_SA_DESC_CFG_LAST_FRAG,
CESA_SA_DESC_CFG_FRAG_MSK);
+ ret = mv_cesa_dma_add_result_op(chain,
+ CESA_SA_CFG_SRAM_OFFSET,
+ CESA_SA_DATA_SRAM_OFFSET,
+ CESA_TDMA_SRC_IN_SRAM, flags);
+ if (ret)
+ return ERR_PTR(-ENOMEM);
return op;
}
@@ -564,6 +586,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
struct mv_cesa_op_ctx *op = NULL;
unsigned int frag_len;
int ret;
+ u32 type;
basereq->chain.first = NULL;
basereq->chain.last = NULL;
@@ -635,7 +658,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
goto err_free_tdma;
}
- if (op) {
+ /*
+ * If results are copied via DMA, this means that this
+ * request can be directly processed by the engine,
+ * without partial updates. So we can chain it at the
+ * DMA level with other requests.
+ */
+ type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
+ if (op && type != CESA_TDMA_RESULT) {
/* Add dummy desc to wait for crypto operation end */
ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
if (ret)
@@ -648,8 +679,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
else
creq->cache_ptr = 0;
- basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
- CESA_TDMA_BREAK_CHAIN);
+ basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+
+ if (type != CESA_TDMA_RESULT)
+ basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
return 0;
--
2.9.3
^ permalink raw reply related
* [PATCH v4 1/2] crypto: marvell - Use an unique pool to copy results of requests
From: Romain Perier @ 2016-10-05 7:56 UTC (permalink / raw)
To: Boris Brezillon, Arnaud Ebalard
Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161005075633.12711-1-romain.perier@free-electrons.com>
So far, we used a dedicated dma pool to copy the result of outer IV for
cipher requests. Instead of using a dma pool per outer data, we prefer
use the op dma pool that contains all part of the request from the SRAM.
Then, the outer data that is likely to be used by the 'complete'
operation, is copied later. In this way, any type of result can be
retrieved by DMA for cipher or ahash requests.
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
Changes in v4:
- Added a comment that explains why we retrieve the first op ctx
of the chain.
- Added the tag 'Acked-by'
Changes in v3:
- Don't allocate a new op ctx for the last tdma descriptor. Instead
we point to the last op ctx in the tdma chain, and copy the context
of the current request to this location.
Changes in v2:
- Use the dma pool "op" to retrieve outer data intead of introducing
a new one.
drivers/crypto/marvell/cesa.c | 4 ----
drivers/crypto/marvell/cesa.h | 5 ++---
drivers/crypto/marvell/cipher.c | 8 +++++---
drivers/crypto/marvell/tdma.c | 33 +++++++++++++++++++--------------
4 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 37dadb2..6e7a5c7 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -375,10 +375,6 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
if (!dma->padding_pool)
return -ENOMEM;
- dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
- if (!dma->iv_pool)
- return -ENOMEM;
-
cesa->dma = dma;
return 0;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index e423d33..a768da7 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
#define CESA_TDMA_DUMMY 0
#define CESA_TDMA_DATA 1
#define CESA_TDMA_OP 2
-#define CESA_TDMA_IV 3
+#define CESA_TDMA_RESULT 3
/**
* struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +393,6 @@ struct mv_cesa_dev_dma {
struct dma_pool *op_pool;
struct dma_pool *cache_pool;
struct dma_pool *padding_pool;
- struct dma_pool *iv_pool;
};
/**
@@ -839,7 +838,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
memset(chain, 0, sizeof(*chain));
}
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
u32 size, u32 flags, gfp_t gfp_flags);
struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index d19dc96..098871a 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -212,7 +212,8 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
struct mv_cesa_req *basereq;
basereq = &creq->base;
- memcpy(ablkreq->info, basereq->chain.last->data, ivsize);
+ memcpy(ablkreq->info, basereq->chain.last->op->ctx.blkcipher.iv,
+ ivsize);
} else {
memcpy_fromio(ablkreq->info,
engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
@@ -373,8 +374,9 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
/* Add output data for IV */
ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
- ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
- ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
+ ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
+ CESA_SA_DATA_SRAM_OFFSET,
+ CESA_TDMA_SRC_IN_SRAM, flags);
if (ret)
goto err_free_tdma;
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 9fd7a5f..4416b88 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -69,9 +69,6 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
if (type == CESA_TDMA_OP)
dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
le32_to_cpu(tdma->src));
- else if (type == CESA_TDMA_IV)
- dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
- le32_to_cpu(tdma->dst));
tdma = tdma->next;
dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
@@ -209,29 +206,37 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
return new_tdma;
}
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
u32 size, u32 flags, gfp_t gfp_flags)
{
-
- struct mv_cesa_tdma_desc *tdma;
- u8 *iv;
- dma_addr_t dma_handle;
+ struct mv_cesa_tdma_desc *tdma, *op_desc;
tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
if (IS_ERR(tdma))
return PTR_ERR(tdma);
- iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
- if (!iv)
- return -ENOMEM;
+ /* We re-use an existing op_desc object to retrieve the context
+ * and result instead of allocating a new one.
+ * There is at least one object of this type in a CESA crypto
+ * req, just pick the first one in the chain.
+ */
+ for (op_desc = chain->first; op_desc; op_desc = op_desc->next) {
+ u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK;
+
+ if (type == CESA_TDMA_OP)
+ break;
+ }
+
+ if (!op_desc)
+ return -EIO;
tdma->byte_cnt = cpu_to_le32(size | BIT(31));
tdma->src = src;
- tdma->dst = cpu_to_le32(dma_handle);
- tdma->data = iv;
+ tdma->dst = op_desc->src;
+ tdma->op = op_desc->op;
flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
- tdma->flags = flags | CESA_TDMA_IV;
+ tdma->flags = flags | CESA_TDMA_RESULT;
return 0;
}
--
2.9.3
^ permalink raw reply related
* [PATCH] Fix Kconfig dependencies for FIPS
From: Alec Ari @ 2016-10-04 22:34 UTC (permalink / raw)
To: linux-crypto
Currently FIPS depends on MODULE_SIG, even if MODULES is disabled.
This change allows the enabling of FIPS without support for modules.
If module loading support is enabled, only then does
FIPS require MODULE_SIG.
Signed-off-by: Alec Ari <neotheuser@gmail.com>
---
crypto/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 84d7148..fd28805 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -24,7 +24,7 @@ comment "Crypto core or helper"
config CRYPTO_FIPS
bool "FIPS 200 compliance"
depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) &&
!CRYPTO_MANAGER_DISABLE_TESTS
- depends on MODULE_SIG
+ depends on (MODULE_SIG || !MODULES)
help
This options enables the fips boot option which is
required if you want to system to operate in a FIPS 200
--
2.7.3
^ permalink raw reply related
* Re: sha1_mb broken
From: Stephan Mueller @ 2016-10-04 16:27 UTC (permalink / raw)
To: Tim Chen; +Cc: Dey, Megha, linux-crypto@vger.kernel.org
In-Reply-To: <1475597322.3916.283.camel@linux.intel.com>
Am Dienstag, 4. Oktober 2016, 09:08:42 CEST schrieb Tim Chen:
Hi Tim,
> There is a spin lock protecting the completion's wait_queue on the processes
> waiting for the completion of the job, and the queue head. My suspicion is
> if these structures are not initialized properly, we fail to look up the
> waiting process in the queue properly to call it. For the other tested
> cases, they may not be a true ahash operation in the sense of passing
> request through the crypto daemon, and have to context switch to let crypto
> daemon complete the job. The computation proceeds
> and returns in the same call chain.
Thanks a lot for the clarification.
Ciao
Stephan
^ permalink raw reply
* Re: sha1_mb broken
From: Tim Chen @ 2016-10-04 16:08 UTC (permalink / raw)
To: Stephan Mueller, Dey, Megha; +Cc: linux-crypto@vger.kernel.org
In-Reply-To: <2176107.gSz0A05ekE@tauon.atsec.com>
On Tue, 2016-10-04 at 16:10 +0200, Stephan Mueller wrote:
> Am Dienstag, 4. Oktober 2016, 00:25:07 CEST schrieb Dey, Megha:
>
> Hi Megha,
>
> >
> >
> > >
> > > Hi Stephan,
> > >
> > > Your test code initialized the completion structure incorrectly, that led
> > > to the missing completion from being received. The init_completion call
> > > should be made before the crypto_ahash_digest call. The following change
> Thanks a lot for pointing that one out. Can you help me understand why your
> code trips over that issue whereas other ahash implementations do not (all
> other SHA-1 or SHA-2 implementations work perfectly fine with that code)?
>
There is a spin lock protecting the completion's wait_queue on the processes waiting for
the completion of the job, and the queue head. My suspicion is if these
structures are not initialized properly, we fail to look up the waiting process in the queue
properly to call it. For the other tested cases, they may not be a true ahash operation
in the sense of passing request through the crypto daemon, and have to context switch
to let crypto daemon complete the job. The computation proceeds
and returns in the same call chain.
Thanks.
Tim
^ permalink raw reply
* Re: [PATCH v3 2/2] crypto: marvell - Don't break chain for computable last ahash requests
From: Boris Brezillon @ 2016-10-04 14:14 UTC (permalink / raw)
To: Romain Perier
Cc: Arnaud Ebalard, David S. Miller, Herbert Xu, Thomas Petazzoni,
Jason Cooper, Andrew Lunn, Sebastian Hesselbarth, Gregory Clement,
Nadav Haklai, Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161004125720.3347-3-romain.perier@free-electrons.com>
On Tue, 4 Oct 2016 14:57:20 +0200
Romain Perier <romain.perier@free-electrons.com> wrote:
> Currently, the driver breaks chain for all kind of hash requests in order to
> don't override intermediate states of partial ahash updates. However, some final
> ahash requests can be directly processed by the engine, and so without
> intermediate state. This is typically the case for most for the HMAC requests
> processed via IPSec.
>
> This commits adds a TDMA descriptor to copy context for these of requests
> into the "op" dma pool, then it allow to chain these requests at the DMA level.
> The 'complete' operation is also updated to retrieve the MAC digest from the
> right location.
>
> Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
> ---
>
> Changes in v3:
> - Copy the whole context back to RAM and not just the digest. Also
> fixed a rebase issue ^^ (whoops)
>
> Changes in v2:
> - Replaced BUG_ON by an error
> - Add a variable "break_chain", with "type" to break the chain
>
> with ahash requests. It improves code readability.
> drivers/crypto/marvell/hash.c | 79 +++++++++++++++++++++++++++++++++++--------
> 1 file changed, 64 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
> index 9f28468..b36f196 100644
> --- a/drivers/crypto/marvell/hash.c
> +++ b/drivers/crypto/marvell/hash.c
> @@ -312,24 +312,53 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
> int i;
>
> digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
> - for (i = 0; i < digsize / 4; i++)
> - creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
>
> - if (creq->last_req) {
> + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
> + !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
> + struct mv_cesa_tdma_desc *tdma = NULL;
> + __le32 *data = NULL;
> +
> + for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
> + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
> + if (type == CESA_TDMA_RESULT)
> + break;
> + }
You should be able to drop the DUMMY desc at the end of the chain and
replace it by the RESULT desc. This way, you won't have to iterate over
the chain to find the TDMA_RESULT element: it should always be the last
desc in the chain.
> +
> + if (!tdma) {
> + dev_err(cesa_dev->dev, "Failed to retrieve tdma "
> + "descriptor for outer data\n");
> + return;
> + }
> +
> /*
> - * Hardware's MD5 digest is in little endian format, but
> - * SHA in big endian format
> + * Result is already in the correct endianess when the SA is
> + * used
> */
> - if (creq->algo_le) {
> - __le32 *result = (void *)ahashreq->result;
> + data = tdma->op->ctx.hash.hash;
> + for (i = 0; i < digsize / 4; i++)
> + creq->state[i] = cpu_to_le32(data[i]);
>
> - for (i = 0; i < digsize / 4; i++)
> - result[i] = cpu_to_le32(creq->state[i]);
> - } else {
> - __be32 *result = (void *)ahashreq->result;
> + memcpy(ahashreq->result, data, digsize);
> + } else {
> + for (i = 0; i < digsize / 4; i++)
> + creq->state[i] = readl_relaxed(engine->regs +
> + CESA_IVDIG(i));
> + if (creq->last_req) {
> + /*
> + * Hardware's MD5 digest is in little endian format, but
> + * SHA in big endian format
> + */
> + if (creq->algo_le) {
> + __le32 *result = (void *)ahashreq->result;
> +
> + for (i = 0; i < digsize / 4; i++)
> + result[i] = cpu_to_le32(creq->state[i]);
> + } else {
> + __be32 *result = (void *)ahashreq->result;
>
> - for (i = 0; i < digsize / 4; i++)
> - result[i] = cpu_to_be32(creq->state[i]);
> + for (i = 0; i < digsize / 4; i++)
> + result[i] = cpu_to_be32(creq->state[i]);
> + }
> }
> }
>
> @@ -504,6 +533,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
> CESA_SA_DESC_CFG_LAST_FRAG,
> CESA_SA_DESC_CFG_FRAG_MSK);
>
> + ret = mv_cesa_dma_add_result_op(chain,
> + CESA_SA_CFG_SRAM_OFFSET,
> + CESA_SA_DATA_SRAM_OFFSET,
> + CESA_TDMA_SRC_IN_SRAM, flags);
> + if (ret)
> + return ERR_PTR(-ENOMEM);
> return op;
> }
>
> @@ -564,6 +599,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> struct mv_cesa_op_ctx *op = NULL;
> unsigned int frag_len;
> int ret;
> + u32 type;
> + bool break_chain = true;
>
> basereq->chain.first = NULL;
> basereq->chain.last = NULL;
> @@ -635,6 +672,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> goto err_free_tdma;
> }
>
> + /*
> + * If results are copied via DMA, this means that this
> + * request can be directly processed by the engine,
> + * without partial updates. So we can chain it at the
> + * DMA level with other requests.
> + */
> + type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
> + if (type == CESA_TDMA_RESULT)
> + break_chain = false;
> +
> if (op) {
> /* Add dummy desc to wait for crypto operation end */
> ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
> @@ -648,8 +695,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
> else
> creq->cache_ptr = 0;
>
> - basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
> - CESA_TDMA_BREAK_CHAIN);
> + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
> +
> + if (break_chain)
> + basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
Not sure this break_chain variable is really needed. you can directly
test the type of the last element in the TDMA chain here and if it's
!= CESA_TDMA_RESULT, pass the CESA_TDMA_BREAK_CHAIN flag.
>
> return 0;
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox