Linux cryptographic layer development
 help / color / mirror / Atom feed
* Re: [PATCH 0/3] Fix crypto/vmx/p8_ghash memory corruption
From: Herbert Xu @ 2016-10-10  2:16 UTC (permalink / raw)
  To: Marcelo Cerri; +Cc: linux-crypto
In-Reply-To: <20161003150725.GF10133@gallifrey>

On Mon, Oct 03, 2016 at 12:07:25PM -0300, Marcelo Cerri wrote:
> Hi Herbert,
> 
> Sorry for bothering you. I noticed you included two of the patches in
> the crypto-2.6 repository and the remaining one in cryptodev-2.6. Is
> that right? I thought all 3 patches would be included in the cruptodev
> repository.

I wanted the first two to go to stable as well so that's why
I split them up.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH 6/6] crypto: arm64/aes-neon - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The AES implementation using pure NEON instructions relies on the generic
AES key schedule generation routines, which store the round keys as arrays
of 32-bit quantities stored in memory using native endianness. This means
we should refer to these round keys using 4x4 loads rather than 16x1 loads.
In addition, the ShiftRows tables are loading using a single scalar load,
which is also affected by endianness, so emit these tables in the correct
order depending on whether we are building for big endian or not.

Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/aes-neon.S | 25 ++++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index b93170e1cc93..85f07ead7c5c 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 #define AES_ENTRY(func)		ENTRY(neon_ ## func)
 #define AES_ENDPROC(func)	ENDPROC(neon_ ## func)
@@ -83,13 +84,13 @@
 	.endm
 
 	.macro		do_block, enc, in, rounds, rk, rkp, i
-	ld1		{v15.16b}, [\rk]
+	ld1		{v15.4s}, [\rk]
 	add		\rkp, \rk, #16
 	mov		\i, \rounds
 1111:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
 	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
 	sub_bytes	\in
-	ld1		{v15.16b}, [\rkp], #16
+	ld1		{v15.4s}, [\rkp], #16
 	subs		\i, \i, #1
 	beq		2222f
 	.if		\enc == 1
@@ -229,7 +230,7 @@
 	.endm
 
 	.macro		do_block_2x, enc, in0, in1 rounds, rk, rkp, i
-	ld1		{v15.16b}, [\rk]
+	ld1		{v15.4s}, [\rk]
 	add		\rkp, \rk, #16
 	mov		\i, \rounds
 1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
@@ -237,7 +238,7 @@
 	sub_bytes_2x	\in0, \in1
 	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
 	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
-	ld1		{v15.16b}, [\rkp], #16
+	ld1		{v15.4s}, [\rkp], #16
 	subs		\i, \i, #1
 	beq		2222f
 	.if		\enc == 1
@@ -254,7 +255,7 @@
 	.endm
 
 	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
-	ld1		{v15.16b}, [\rk]
+	ld1		{v15.4s}, [\rk]
 	add		\rkp, \rk, #16
 	mov		\i, \rounds
 1111:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
@@ -266,7 +267,7 @@
 	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
 	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
 	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
-	ld1		{v15.16b}, [\rkp], #16
+	ld1		{v15.4s}, [\rkp], #16
 	subs		\i, \i, #1
 	beq		2222f
 	.if		\enc == 1
@@ -306,12 +307,16 @@
 	.text
 	.align		4
 .LForward_ShiftRows:
-	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
-	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+CPU_LE(	.byte		0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3	)
+CPU_LE(	.byte		0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb	)
+CPU_BE(	.byte		0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8	)
+CPU_BE(	.byte		0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0	)
 
 .LReverse_ShiftRows:
-	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
-	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+CPU_LE(	.byte		0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb	)
+CPU_LE(	.byte		0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3	)
+CPU_BE(	.byte		0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8	)
+CPU_BE(	.byte		0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0	)
 
 .LForward_Sbox:
 	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
-- 
2.7.4

^ permalink raw reply related

* [PATCH 5/6] crypto: arm64/aes-ccm-ce: fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions
refers to the AES round keys as pairs of 64-bit quantities, which causes
failures when building the code for big endian. In addition, it byte swaps
the input counter unconditionally, while this is only required for little
endian builds. So fix both issues.

Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/aes-ce-ccm-core.S | 53 ++++++++++----------
 1 file changed, 27 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index a2a7fbcacc14..3363560c79b7 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -9,6 +9,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
 	.text
 	.arch	armv8-a+crypto
@@ -19,7 +20,7 @@
 	 */
 ENTRY(ce_aes_ccm_auth_data)
 	ldr	w8, [x3]			/* leftover from prev round? */
-	ld1	{v0.2d}, [x0]			/* load mac */
+	ld1	{v0.16b}, [x0]			/* load mac */
 	cbz	w8, 1f
 	sub	w8, w8, #16
 	eor	v1.16b, v1.16b, v1.16b
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	beq	8f				/* out of input? */
 	cbnz	w8, 0b
 	eor	v0.16b, v0.16b, v1.16b
-1:	ld1	{v3.2d}, [x4]			/* load first round key */
+1:	ld1	{v3.16b}, [x4]			/* load first round key */
 	prfm	pldl1strm, [x1]
 	cmp	w5, #12				/* which key size? */
 	add	x6, x4, #16
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
 	mov	v5.16b, v3.16b
 	b	4f
 2:	mov	v4.16b, v3.16b
-	ld1	{v5.2d}, [x6], #16		/* load 2nd round key */
+	ld1	{v5.16b}, [x6], #16		/* load 2nd round key */
 3:	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
-4:	ld1	{v3.2d}, [x6], #16		/* load next round key */
+4:	ld1	{v3.16b}, [x6], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
-5:	ld1	{v4.2d}, [x6], #16		/* load next round key */
+5:	ld1	{v4.16b}, [x6], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
-	ld1	{v5.2d}, [x6], #16		/* load next round key */
+	ld1	{v5.16b}, [x6], #16		/* load next round key */
 	bpl	3b
 	aese	v0.16b, v4.16b
 	subs	w2, w2, #16			/* last data? */
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	ld1	{v1.16b}, [x1], #16		/* load next input block */
 	eor	v0.16b, v0.16b, v1.16b		/* xor with mac */
 	bne	1b
-6:	st1	{v0.2d}, [x0]			/* store mac */
+6:	st1	{v0.16b}, [x0]			/* store mac */
 	beq	10f
 	adds	w2, w2, #16
 	beq	10f
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
 	adds	w7, w7, #1
 	bne	9b
 	eor	v0.16b, v0.16b, v1.16b
-	st1	{v0.2d}, [x0]
+	st1	{v0.16b}, [x0]
 10:	str	w8, [x3]
 	ret
 ENDPROC(ce_aes_ccm_auth_data)
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
 	 * 			 u32 rounds);
 	 */
 ENTRY(ce_aes_ccm_final)
-	ld1	{v3.2d}, [x2], #16		/* load first round key */
-	ld1	{v0.2d}, [x0]			/* load mac */
+	ld1	{v3.16b}, [x2], #16		/* load first round key */
+	ld1	{v0.16b}, [x0]			/* load mac */
 	cmp	w3, #12				/* which key size? */
 	sub	w3, w3, #2			/* modified # of rounds */
-	ld1	{v1.2d}, [x1]			/* load 1st ctriv */
+	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
 	bmi	0f
 	bne	3f
 	mov	v5.16b, v3.16b
 	b	2f
 0:	mov	v4.16b, v3.16b
-1:	ld1	{v5.2d}, [x2], #16		/* load next round key */
+1:	ld1	{v5.16b}, [x2], #16		/* load next round key */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-2:	ld1	{v3.2d}, [x2], #16		/* load next round key */
+2:	ld1	{v3.16b}, [x2], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v4.2d}, [x2], #16		/* load next round key */
+3:	ld1	{v4.16b}, [x2], #16		/* load next round key */
 	subs	w3, w3, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
 	aese	v1.16b, v4.16b
 	/* final round key cancels out */
 	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
-	st1	{v0.2d}, [x0]			/* store result */
+	st1	{v0.16b}, [x0]			/* store result */
 	ret
 ENDPROC(ce_aes_ccm_final)
 
 	.macro	aes_ccm_do_crypt,enc
 	ldr	x8, [x6, #8]			/* load lower ctr */
-	ld1	{v0.2d}, [x5]			/* load mac */
-	rev	x8, x8				/* keep swabbed ctr in reg */
+	ld1	{v0.16b}, [x5]			/* load mac */
+CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
 0:	/* outer loop */
-	ld1	{v1.1d}, [x6]			/* load upper ctr */
+	ld1	{v1.8b}, [x6]			/* load upper ctr */
 	prfm	pldl1strm, [x1]
 	add	x8, x8, #1
 	rev	x9, x8
 	cmp	w4, #12				/* which key size? */
 	sub	w7, w4, #2			/* get modified # of rounds */
 	ins	v1.d[1], x9			/* no carry in lower ctr */
-	ld1	{v3.2d}, [x3]			/* load first round key */
+	ld1	{v3.16b}, [x3]			/* load first round key */
 	add	x10, x3, #16
 	bmi	1f
 	bne	4f
 	mov	v5.16b, v3.16b
 	b	3f
 1:	mov	v4.16b, v3.16b
-	ld1	{v5.2d}, [x10], #16		/* load 2nd round key */
+	ld1	{v5.16b}, [x10], #16		/* load 2nd round key */
 2:	/* inner loop: 3 rounds, 2x interleaved */
 	aese	v0.16b, v4.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v4.16b
 	aesmc	v1.16b, v1.16b
-3:	ld1	{v3.2d}, [x10], #16		/* load next round key */
+3:	ld1	{v3.16b}, [x10], #16		/* load next round key */
 	aese	v0.16b, v5.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v5.16b
 	aesmc	v1.16b, v1.16b
-4:	ld1	{v4.2d}, [x10], #16		/* load next round key */
+4:	ld1	{v4.16b}, [x10], #16		/* load next round key */
 	subs	w7, w7, #3
 	aese	v0.16b, v3.16b
 	aesmc	v0.16b, v0.16b
 	aese	v1.16b, v3.16b
 	aesmc	v1.16b, v1.16b
-	ld1	{v5.2d}, [x10], #16		/* load next round key */
+	ld1	{v5.16b}, [x10], #16		/* load next round key */
 	bpl	2b
 	aese	v0.16b, v4.16b
 	aese	v1.16b, v4.16b
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
 	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
 	st1	{v1.16b}, [x0], #16		/* write output block */
 	bne	0b
-	rev	x8, x8
-	st1	{v0.2d}, [x5]			/* store mac */
+CPU_LE(	rev	x8, x8			)
+	st1	{v0.16b}, [x5]			/* store mac */
 	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
 5:	ret
 
 6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
 	eor	v1.16b, v1.16b, v5.16b		/* final round enc */
-	st1	{v0.2d}, [x5]			/* store mac */
+	st1	{v0.16b}, [x5]			/* store mac */
 	add	w2, w2, #16			/* process partial tail block */
 7:	ldrb	w9, [x1], #1			/* get 1 byte of input */
 	umov	w6, v1.b[0]			/* get top crypted ctr byte */
-- 
2.7.4

^ permalink raw reply related

* [PATCH 4/6] crypto: arm64/sha2-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The SHA256 digest is an array of 8 32-bit quantities, so we should refer
to them as such in order for this code to work correctly when built for
big endian. So replace 16 byte scalar loads and stores with 4x32 vector
ones where appropriate.

Fixes: 6ba6c74dfc6b ("arm64/crypto: SHA-224/SHA-256 using ARMv8 Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/sha2-ce-core.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 5df9d9d470ad..01cfee066837 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform)
 	ld1		{v12.4s-v15.4s}, [x8]
 
 	/* load state */
-	ldp		dga, dgb, [x0]
+	ld1		{dgav.4s, dgbv.4s}, [x0]
 
 	/* load sha256_ce_state::finalize */
 	ldr		w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
@@ -148,6 +148,6 @@ CPU_LE(	rev32		v19.16b, v19.16b	)
 	b		1b
 
 	/* store new state */
-3:	stp		dga, dgb, [x0]
+3:	st1		{dgav.4s, dgbv.4s}, [x0]
 	ret
 ENDPROC(sha2_ce_transform)
-- 
2.7.4

^ permalink raw reply related

* [PATCH 3/6] crypto: arm64/sha1-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The SHA1 digest is an array of 5 32-bit quantities, so we should refer
to them as such in order for this code to work correctly when built for
big endian. So replace 16 byte scalar loads and stores with 4x4 vector
ones where appropriate.

Fixes: 2c98833a42cd ("arm64/crypto: SHA-1 using ARMv8 Crypto Extensions")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/sha1-ce-core.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 033aae6d732a..c98e7e849f06 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform)
 	ld1r		{k3.4s}, [x6]
 
 	/* load state */
-	ldr		dga, [x0]
+	ld1		{dgav.4s}, [x0]
 	ldr		dgb, [x0, #16]
 
 	/* load sha1_ce_state::finalize */
@@ -144,7 +144,7 @@ CPU_LE(	rev32		v11.16b, v11.16b	)
 	b		1b
 
 	/* store new state */
-3:	str		dga, [x0]
+3:	st1		{dgav.4s}, [x0]
 	str		dgb, [x0, #16]
 	ret
 ENDPROC(sha1_ce_transform)
-- 
2.7.4

^ permalink raw reply related

* [PATCH 2/6] crypto: arm64/ghash-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The GHASH key and digest are both pairs of 64-bit quantities, but the
GHASH code does not always refer to them as such, causing failures when
built for big endian. So replace the 16x1 loads and stores with 2x8 ones.

Fixes: b913a6404ce2 ("arm64/crypto: improve performance of GHASH algorithm")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/ghash-ce-core.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index dc457015884e..f0bb9f0b524f 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -29,8 +29,8 @@
 	 *			   struct ghash_key const *k, const char *head)
 	 */
 ENTRY(pmull_ghash_update)
-	ld1		{SHASH.16b}, [x3]
-	ld1		{XL.16b}, [x1]
+	ld1		{SHASH.2d}, [x3]
+	ld1		{XL.2d}, [x1]
 	movi		MASK.16b, #0xe1
 	ext		SHASH2.16b, SHASH.16b, SHASH.16b, #8
 	shl		MASK.2d, MASK.2d, #57
@@ -74,6 +74,6 @@ CPU_LE(	rev64		T1.16b, T1.16b	)
 
 	cbnz		w0, 0b
 
-	st1		{XL.16b}, [x1]
+	st1		{XL.2d}, [x1]
 	ret
 ENDPROC(pmull_ghash_update)
-- 
2.7.4

^ permalink raw reply related

* [PATCH 1/6] crypto: arm64/aes-ce - fix for big endian
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel
In-Reply-To: <1476034945-9186-1-git-send-email-ard.biesheuvel@linaro.org>

The core AES cipher implementation that uses ARMv8 Crypto Extensions
instructions erroneously loads the round keys as 64-bit quantities,
which causes the algorithm to fail when built for big endian. In
addition, the key schedule generation routine fails to take endianness
into account as well, when loading the combining the input key with
the round constants. So fix both issues.

Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/aes-ce-cipher.c | 25 ++++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c
index f7bd9bf0bbb3..50d9fe11d0c8 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-cipher.c
@@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	kernel_neon_begin_partial(4);
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	ld1	{v1.16b}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	ld1	{v3.16b}, [%[key]], #16		;"
 		"1:	aese	v0.16b, v2.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"2:	ld1	{v1.16b}, [%[key]], #16		;"
 		"	aese	v0.16b, v3.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"3:	ld1	{v2.16b}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aese	v0.16b, v1.16b			;"
 		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	ld1	{v3.16b}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aese	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 	kernel_neon_begin_partial(4);
 
 	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.2d}, [%[key]], #16		;"
+		"	ld1	{v1.16b}, [%[key]], #16		;"
 		"	cmp	%w[rounds], #10			;"
 		"	bmi	0f				;"
 		"	bne	3f				;"
 		"	mov	v3.16b, v1.16b			;"
 		"	b	2f				;"
 		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	ld1	{v3.16b}, [%[key]], #16		;"
 		"1:	aesd	v0.16b, v2.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.2d}, [%[key]], #16		;"
+		"2:	ld1	{v1.16b}, [%[key]], #16		;"
 		"	aesd	v0.16b, v3.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.2d}, [%[key]], #16		;"
+		"3:	ld1	{v2.16b}, [%[key]], #16		;"
 		"	subs	%w[rounds], %w[rounds], #3	;"
 		"	aesd	v0.16b, v1.16b			;"
 		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.2d}, [%[key]], #16		;"
+		"	ld1	{v3.16b}, [%[key]], #16		;"
 		"	bpl	1b				;"
 		"	aesd	v0.16b, v2.16b			;"
 		"	eor	v0.16b, v0.16b, v3.16b		;"
@@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
+#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+#else
+		rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+			 rki[0];
+#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
-- 
2.7.4

^ permalink raw reply related

* [PATCH 0/6] crypto: arm64 - big endian fixes
From: Ard Biesheuvel @ 2016-10-09 17:42 UTC (permalink / raw)
  To: linux-crypto, linux-arm-kernel, herbert
  Cc: catalin.marinas, will.deacon, Ard Biesheuvel

As it turns out, none of the accelerated crypto routines under arch/arm64/crypto
currently work, or have ever worked correctly when built for big endian. So this
series fixes all of them.

Each of these patches carries a fixes tag, and could be backported to stable.
However, for patches #1 and #5, the fixes tag denotes the oldest commit that the
fix is compatible with, not the patch that introduced the algorithm. This is due
to the fact that the key schedules are incompatible between generic AES and the
arm64 Crypto Extensions implementation (but only when building for big endian)
This is not a problem in practice, but it does mean that the AES-CCM and AES in
EBC/CBC/CTR/XTS mode implementations before v3.19 require a different fix, i.e.,
one that is compatible with the generic AES key schedule generation code (which
it currently no longer uses)

In any case, please apply with cc to stable.

Ard Biesheuvel (6):
  crypto: arm64/aes-ce - fix for big endian
  crypto: arm64/ghash-ce - fix for big endian
  crypto: arm64/sha1-ce - fix for big endian
  crypto: arm64/sha2-ce - fix for big endian
  crypto: arm64/aes-ccm-ce: fix for big endian
  crypto: arm64/aes-neon - fix for big endian

 arch/arm64/crypto/aes-ce-ccm-core.S | 53 ++++++++++----------
 arch/arm64/crypto/aes-ce-cipher.c   | 25 +++++----
 arch/arm64/crypto/aes-neon.S        | 25 +++++----
 arch/arm64/crypto/ghash-ce-core.S   |  6 +--
 arch/arm64/crypto/sha1-ce-core.S    |  4 +-
 arch/arm64/crypto/sha2-ce-core.S    |  4 +-
 6 files changed, 64 insertions(+), 53 deletions(-)

-- 
2.7.4

^ permalink raw reply

* chacha counters in random.c
From: Sami Farin @ 2016-10-09 17:16 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-crypto

commit e192be9d9a30555aae2ca1dc3aad37cba484cd4a

+       chacha20_block(&crng->state[0], out);
+       if (crng->state[12] == 0)
+               crng->state[13]++;

Did you mean 
+       if (++crng->state[12] == 0)

?

-- 
Do what you love because life is too short for anything else.
https://samifar.in/

^ permalink raw reply

* Re: Observed a ecryptFS crash
From: xiakaixu @ 2016-10-09  7:12 UTC (permalink / raw)
  To: tyhicks@canonical.com
  Cc: liushuoran, linux-crypto@vger.kernel.org,
	ecryptfs@vger.kernel.org, linux-kernel@vger.kernel.org,
	Yaodongdong, Wangbintian, yingjindong, Yezongbo, likan (A)
In-Reply-To: <00B10D30F2BAA743B48953A4D86C96D54CB653@SZXEMI506-MBS.china.huawei.com>

ping...


> Hi Tyhicks,
>
> We observed a ecryptFS crash occasionally in Linux kernel 4.1.18. The call trace is attached below. Is it a known issue? Look forward to hearing from you. Thanks in advance!
>
> [19314.529479s][pid:2694,cpu3,GAC_Executor[0]]Call trace:
> [19314.529510s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000f3898>] do_raw_spin_lock+0x20/0x200
> [19314.529510s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc001031fb0>] _raw_spin_lock+0x28/0x34
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0003908e0>] selinux_inode_free_security+0x3c/0x94
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000386b04>] security_inode_free+0x2c/0x38
> [19314.529541s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001fff88>] __destroy_inode+0x2c/0x180
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000201660>] destroy_inode+0x30/0xa0
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002017d8>] evict+0x108/0x1c0
> [19314.529571s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000202588>] iput+0x184/0x258
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002f27d0>] ecryptfs_evict_inode+0x30/0x3c
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc00020177c>] evict+0xac/0x1c0
> [19314.529602s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002018d4>] dispose_list+0x44/0x5c
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000202d28>] evict_inodes+0xcc/0x12c
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e5d04>] generic_shutdown_super+0x58/0xe4
> [19314.529632s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e7164>] kill_anon_super+0x30/0x74
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0002f16a4>] ecryptfs_kill_block_super+0x24/0x54
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e6690>] deactivate_locked_super+0x60/0x8c
> [19314.529663s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0001e6754>] deactivate_super+0x98/0xa4
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000206d54>] cleanup_mnt+0x50/0xd0
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000206e48>] __cleanup_mnt+0x20/0x2c
> [19314.529693s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000c1bac>] task_work_run+0xbc/0xf8
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000a3e98>] do_exit+0x2d4/0xa14
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000a56a8>] do_group_exit+0x60/0xf8
> [19314.529724s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc0000b26ac>] get_signal+0x284/0x598
> [19314.529754s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc00008950c>] do_signal+0x170/0x5b8
> [19314.529754s][pid:2694,cpu3,GAC_Executor[0]][<ffffffc000089be0>] do_notify_resume+0x70/0x78
> [19314.529785s][pid:2694,cpu3,GAC_Executor[0]]Code: aa0003f3 aa1e03e0 97fe7718 5289d5a0 (b9400661)
> [19314.529907s][pid:2694,cpu3,GAC_Executor[0]]---[ end trace 382e4b6264b035b5 ]---
> [19314.529907s][pid:2694,cpu3,GAC_Executor[0]]Kernel panic - not syncing: Fatal exception
>
> Regards,
> Shuoran
>

^ permalink raw reply

* Re: PEACE BE WITH YOU,
From: contact @ 2016-10-06  4:37 UTC (permalink / raw)
  To: Recipients

Subject: Can I Trust You In This5 Days Project ?

Email (     dr.kpyj1958@gmail.com   )

Dear Friend

My name is  ATTORNEY KONO PETER , I really do not mean to waste your time. Considering the fact that this is a £36,000,000.00 British Pounds.deal shear rate 50/50 % and it's bank to bank wire trasaction within Five workind days.
 
I carefully contact you due to many Internet frauds nowadays.but i put my faith in God because all things in life is by risk but don't let me down now or after, This is Mr.Plaviashakunthala Lobo and his family was involved in plan crash 22nd of May 2010 in List of passengers on Air India Express flight that crash  32.Plaviashakunthala Lobo 33. Venishanikola Lobo
34. Vishalfloid Lobo (child) and all family died without any inheritance or next of kin so i want you to work together with me been an attorney so this is Mr.Plaviashakunthala Lobo account details.
 
Bank name:       Bank of Africa
Bank Address:    Cotonou Benin Republic
Account name:    Plavia shakunthala Lobo 
Account Number:  1103-8022-1351
Account Balance: £36,000,000.00 British Pounds (GBP)
Date of deposit: 19th December, 2009
Account officer : Bashirudeen  Hussam

I was with Mr.Plaviashakunthala Lobo as a legal witness when this money was deposited as fixed deposit in 2009. Since his demise, I have visited this bank three times. Contact the bank and ask for the confirmation of his involvement in the plane crash.check the website:  http://www.thehindu.com/news/national/list-of-passengers-on-air-india-express-flight/article435569.ece
 
Thanks
ATTORNEY  KONO PETER  
------------------------------------------
Disclaimer: This message transmitted with it are confidential and privileged. If you have received it in error, please notify the sender by return e-mail and delete this message from your system. If you are not the intended recipient you are hereby notified that any dissemination, copy or disclosure of this e-mail is strictly prohibited.

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus

^ permalink raw reply

* Re: [PATCH] crypto: caam: add support for iMX6UL
From: Rob Herring @ 2016-10-09  1:29 UTC (permalink / raw)
  To: Marcus Folkesson
  Cc: herbert, davem, mark.rutland, horia.geanta, tudor-dan.ambarus,
	alexandru.porosanu, arnd, linux-crypto, devicetree, linux-kernel
In-Reply-To: <20161004133259.GA30071@gmail.com>

On Tue, Oct 04, 2016 at 09:32:59AM -0400, Marcus Folkesson wrote:
> i.MX6UL does only require three clocks to enable CAAM module.
> 
> Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
> ---
>  .../devicetree/bindings/crypto/fsl-sec4.txt        | 20 +++++++++++++

Acked-by: Rob Herring <robh@kernel.org>

>  drivers/crypto/caam/ctrl.c                         | 35 ++++++++++++----------
>  2 files changed, 40 insertions(+), 15 deletions(-)

^ permalink raw reply

* [PATCH] crypto: api - Remove no-op exit_ops code
From: Eric Biggers @ 2016-10-07 21:13 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Eric Biggers

crypto_exit_cipher_ops() and crypto_exit_compress_ops() are no-ops and
have been for a long time, so remove them.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 crypto/api.c      | 20 ++------------------
 crypto/cipher.c   |  4 ----
 crypto/compress.c |  4 ----
 crypto/internal.h |  3 ---
 4 files changed, 2 insertions(+), 29 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index bbc147c..a88729f 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 {
 	const struct crypto_type *type = tfm->__crt_alg->cra_type;
 
-	if (type) {
-		if (tfm->exit)
-			tfm->exit(tfm);
-		return;
-	}
-
-	switch (crypto_tfm_alg_type(tfm)) {
-	case CRYPTO_ALG_TYPE_CIPHER:
-		crypto_exit_cipher_ops(tfm);
-		break;
-
-	case CRYPTO_ALG_TYPE_COMPRESS:
-		crypto_exit_compress_ops(tfm);
-		break;
-
-	default:
-		BUG();
-	}
+	if (type && tfm->exit)
+		tfm->exit(tfm);
 }
 
 static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask)
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 39541e0..94fa355 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm)
 
 	return 0;
 }
-
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/compress.c b/crypto/compress.c
index c33f076..f2d5229 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm)
 
 	return 0;
 }
-
-void crypto_exit_compress_ops(struct crypto_tfm *tfm)
-{
-}
diff --git a/crypto/internal.h b/crypto/internal.h
index 7eefcdb..f073204 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask);
 int crypto_init_cipher_ops(struct crypto_tfm *tfm);
 int crypto_init_compress_ops(struct crypto_tfm *tfm);
 
-void crypto_exit_cipher_ops(struct crypto_tfm *tfm);
-void crypto_exit_compress_ops(struct crypto_tfm *tfm);
-
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
 struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related

* [PATCH] crypto: skcipher - Remove unused crypto_lookup_skcipher() declaration
From: Eric Biggers @ 2016-10-07 21:13 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Eric Biggers

The definition of crypto_lookup_skcipher() was already removed in
commit 3a01d0ee2b99 ("crypto: skcipher - Remove top-level givcipher
interface").  So the declaration should be removed too.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/crypto/internal/skcipher.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index a21a95e..95d2a18 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -74,8 +74,6 @@ static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn,
 	return crypto_grab_skcipher(spawn, name, type, mask);
 }
 
-struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask);
-
 static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn)
 {
 	crypto_drop_spawn(&spawn->base);
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related

* [PATCH] crypto: crypto4xx - Fix size used in dma_free_coherent()
From: Christophe JAILLET @ 2016-10-07 20:36 UTC (permalink / raw)
  To: herbert, davem
  Cc: linux-crypto, linux-kernel, kernel-janitors, Christophe JAILLET

The size used in 'dma_free_coherent()' looks un-initialized here.
ctx->sa_len is set a few lines below and is apparently not set by the
caller.
So use 'size' as in the corresponding 'dma_alloc_coherent()' a few lines
above.

This has been spotted with coccinelle, using the following script:
////////////////////
@r@
expression x0, x1, y0, y1, z0, z1, t0, t1, ret;
@@

*   ret = dma_alloc_coherent(x0, y0, z0, t0);
    ...
*   dma_free_coherent(x1, y1, ret, t1);


@script:python@
y0 << r.y0;
y1 << r.y1;

@@
if y1.find(y0) == -1:
 print "WARNING: sizes look different:  '%s'   vs   '%s'" % (y0, y1)
////////////////////

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
 drivers/crypto/amcc/crypto4xx_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index dae1e39139e9..d10b4ae5e0da 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size)
 	ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4,
 					 &ctx->sa_out_dma_addr, GFP_ATOMIC);
 	if (ctx->sa_out == NULL) {
-		dma_free_coherent(ctx->dev->core_dev->device,
-				  ctx->sa_len * 4,
+		dma_free_coherent(ctx->dev->core_dev->device, size * 4,
 				  ctx->sa_in, ctx->sa_in_dma_addr);
 		return -ENOMEM;
 	}
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH] padata: add helper function for queue length
From: Steffen Klassert @ 2016-10-07  3:15 UTC (permalink / raw)
  To: Jason A. Donenfeld; +Cc: linux-crypto, linux-kernel
In-Reply-To: <20161002014638.8049-1-Jason@zx2c4.com>

On Sun, Oct 02, 2016 at 03:46:38AM +0200, Jason A. Donenfeld wrote:
> Since padata has a maximum number of inflight jobs, currently 1000, it's
> very useful to know how many jobs are currently queued up. This adds a
> simple helper function to expose this information.
> 
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
>  include/linux/padata.h |  2 ++
>  kernel/padata.c        | 16 ++++++++++++++++
>  2 files changed, 18 insertions(+)
> 
> diff --git a/include/linux/padata.h b/include/linux/padata.h
> index 113ee62..4840ae4 100644
> --- a/include/linux/padata.h
> +++ b/include/linux/padata.h
> @@ -3,6 +3,7 @@
>   *
>   * Copyright (C) 2008, 2009 secunet Security Networks AG
>   * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
> + * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms and conditions of the GNU General Public License,
> @@ -181,4 +182,5 @@ extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
>  					    struct notifier_block *nblock);
>  extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
>  					      struct notifier_block *nblock);
> +extern int padata_queue_len(struct padata_instance *pinst);
>  #endif
> diff --git a/kernel/padata.c b/kernel/padata.c
> index 9932788..17c1e08 100644
> --- a/kernel/padata.c
> +++ b/kernel/padata.c
> @@ -5,6 +5,7 @@
>   *
>   * Copyright (C) 2008, 2009 secunet Security Networks AG
>   * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
> + * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms and conditions of the GNU General Public License,
> @@ -1039,3 +1040,18 @@ void padata_free(struct padata_instance *pinst)
>  	kobject_put(&pinst->kobj);
>  }
>  EXPORT_SYMBOL(padata_free);
> +
> +/**
> + * padata_queue_len - retreive the number of in progress jobs
> + *
> + * @padata_inst: padata instance from which to read the queue size
> + */
> +int padata_queue_len(struct padata_instance *pinst)
> +{
> +	int len;
> +	rcu_read_lock_bh();
> +	len = atomic_read(&rcu_dereference_bh(pinst->pd)->refcnt);
> +	rcu_read_unlock_bh();
> +	return len;
> +}
> +EXPORT_SYMBOL(padata_queue_len);

Why you want to have this? Without having a user of this function,
there is no point on adding it.

^ permalink raw reply

* Re: [PATCH] crypto: caam: add support for iMX6UL
From: Horia Geanta Neag @ 2016-10-06  7:12 UTC (permalink / raw)
  To: Marcus Folkesson, herbert@gondor.apana.org.au,
	davem@davemloft.net, robh+dt@kernel.org, mark.rutland@arm.com,
	Tudor-Dan Ambarus, Alexandru Porosanu, arnd@arndb.de
  Cc: linux-crypto@vger.kernel.org, devicetree@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20161004133259.GA30071@gmail.com>

On 10/4/2016 10:33 AM, Marcus Folkesson wrote:
> i.MX6UL does only require three clocks to enable CAAM module.
> 
> Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>

Thanks,
Horia


^ permalink raw reply

* Re: [PATCH v3 0/8] Add support for SafeXcel IP-76 to OMAP RNG
From: Matthijs van Duin @ 2016-10-05 20:37 UTC (permalink / raw)
  To: Romain Perier
  Cc: dsaxena, mpm, Herbert Xu, Gregory Clement, Thomas Petazzoni,
	Nadav Haklai, Omri Itach, Shadi Ammouri, Yahuda Yitschak,
	Hanna Hawa, Neta Zur Hershkovits, Igal Liberman, Marcin Wojtas,
	linux-crypto, linux-omap@vger.kernel.org
In-Reply-To: <57E8F1F1.6040906@free-electrons.com>

>>> The driver omap-rng has a lot of similarity with the IP block SafeXcel
>>> IP-76. A lot of registers are the same and the way that the driver works
>>> is very closed the description of the TRNG EIP76 in its datasheet.

Specifically the omap rng, at least the version I've examined, is a
SafeXcel EIP-75a core with a TI wrapper according to my notes. I don't
remember anymore where I obtained the "a", but the peripheral does
identify itself as EIP-75 v2.0.0 in the register at offset 0x7c.

Matthijs

^ permalink raw reply

* [PATCH v4 0/2] Improve DMA chaining for ahash requests
From: Romain Perier @ 2016-10-05  7:56 UTC (permalink / raw)
  To: Boris Brezillon, Arnaud Ebalard
  Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
	Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
	Ofer Heifetz, linux-crypto, linux-arm-kernel

This series contain performance improvement regarding ahash requests.
So far, ahash requests were systematically not chained at the DMA level.
However, in some case, like this is the case by using IPSec, some ahash
requests can be processed directly by the engine, and don't have
intermediaire partial update states.

This series firstly re-work the way outer IVs are copied from the SRAM
into the dma pool. To do so, we introduce a common dma pool for all type
of requests that contains outer results (like IV or digest). Then, for
ahash requests that can be processed directly by the engine, outer
results are copied from the SRAM into the common dma pool. These requests
are then allowed to be chained at the DMA level.


Benchmarking results with iperf throught IPSec
==============================================
		ESP			AH

Before		343 Mbits/s		492 Mbits/s
After		422 Mbits/s		577 Mbits/s
Improvement	+23%			+17%

Romain Perier (2):
  crypto: marvell - Use an unique pool to copy results of requests
  crypto: marvell - Don't break chain for computable last ahash requests

 drivers/crypto/marvell/cesa.c   |  4 ---
 drivers/crypto/marvell/cesa.h   |  5 ++--
 drivers/crypto/marvell/cipher.c |  8 +++--
 drivers/crypto/marvell/hash.c   | 65 +++++++++++++++++++++++++++++++----------
 drivers/crypto/marvell/tdma.c   | 28 +++++++++---------
 5 files changed, 70 insertions(+), 40 deletions(-)

-- 
2.9.3

^ permalink raw reply

* [PATCH v4 2/2] crypto: marvell - Don't break chain for computable last ahash requests
From: Romain Perier @ 2016-10-05  7:56 UTC (permalink / raw)
  To: Boris Brezillon, Arnaud Ebalard
  Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
	Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
	Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161005075633.12711-1-romain.perier@free-electrons.com>

Currently, the driver breaks chain for all kind of hash requests in order to
don't override intermediate states of partial ahash updates. However, some final
ahash requests can be directly processed by the engine, and so without
intermediate state. This is typically the case for most for the HMAC requests
processed via IPSec.

This commits adds a TDMA descriptor to copy context for these of requests
into the "op" dma pool, then it allow to chain these requests at the DMA level.
The 'complete' operation is also updated to retrieve the MAC digest from the
right location.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
---

Changes in v4:
 - Remove the dummy descriptor at the end of the chain, when a TDMA_RESULT
   is present. So, we re-wrote a bit the code of ahash_complete accordingly.

Changes in v3:
 - Copy the whole context back to RAM and not just the digest. Also
   fixed a rebase issue ^^ (whoops)

Changes in v2:
 - Replaced BUG_ON by an error
 - Add a variable "break_chain", with "type" to break the chain

 drivers/crypto/marvell/hash.c | 65 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 9f28468..2a92605 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -312,24 +312,40 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
 	int i;
 
 	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
-	for (i = 0; i < digsize / 4; i++)
-		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
 
-	if (creq->last_req) {
+	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
+	    (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) {
+		__le32 *data = NULL;
+
 		/*
-		 * Hardware's MD5 digest is in little endian format, but
-		 * SHA in big endian format
+		 * Result is already in the correct endianess when the SA is
+		 * used
 		 */
-		if (creq->algo_le) {
-			__le32 *result = (void *)ahashreq->result;
+		data = creq->base.chain.last->op->ctx.hash.hash;
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = cpu_to_le32(data[i]);
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_le32(creq->state[i]);
-		} else {
-			__be32 *result = (void *)ahashreq->result;
+		memcpy(ahashreq->result, data, digsize);
+	} else {
+		for (i = 0; i < digsize / 4; i++)
+			creq->state[i] = readl_relaxed(engine->regs +
+						       CESA_IVDIG(i));
+		if (creq->last_req) {
+			/*
+			* Hardware's MD5 digest is in little endian format, but
+			* SHA in big endian format
+			*/
+			if (creq->algo_le) {
+				__le32 *result = (void *)ahashreq->result;
+
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_le32(creq->state[i]);
+			} else {
+				__be32 *result = (void *)ahashreq->result;
 
-			for (i = 0; i < digsize / 4; i++)
-				result[i] = cpu_to_be32(creq->state[i]);
+				for (i = 0; i < digsize / 4; i++)
+					result[i] = cpu_to_be32(creq->state[i]);
+			}
 		}
 	}
 
@@ -504,6 +520,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
 						CESA_SA_DESC_CFG_LAST_FRAG,
 				      CESA_SA_DESC_CFG_FRAG_MSK);
 
+		ret = mv_cesa_dma_add_result_op(chain,
+						CESA_SA_CFG_SRAM_OFFSET,
+						CESA_SA_DATA_SRAM_OFFSET,
+						CESA_TDMA_SRC_IN_SRAM, flags);
+		if (ret)
+			return ERR_PTR(-ENOMEM);
 		return op;
 	}
 
@@ -564,6 +586,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	struct mv_cesa_op_ctx *op = NULL;
 	unsigned int frag_len;
 	int ret;
+	u32 type;
 
 	basereq->chain.first = NULL;
 	basereq->chain.last = NULL;
@@ -635,7 +658,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 		goto err_free_tdma;
 	}
 
-	if (op) {
+	/*
+	 * If results are copied via DMA, this means that this
+	 * request can be directly processed by the engine,
+	 * without partial updates. So we can chain it at the
+	 * DMA level with other requests.
+	 */
+	type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
+
+	if (op && type != CESA_TDMA_RESULT) {
 		/* Add dummy desc to wait for crypto operation end */
 		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
 		if (ret)
@@ -648,8 +679,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
 	else
 		creq->cache_ptr = 0;
 
-	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
-				       CESA_TDMA_BREAK_CHAIN);
+	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
+
+	if (type != CESA_TDMA_RESULT)
+		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;
 
 	return 0;
 
-- 
2.9.3

^ permalink raw reply related

* [PATCH v4 1/2] crypto: marvell - Use an unique pool to copy results of requests
From: Romain Perier @ 2016-10-05  7:56 UTC (permalink / raw)
  To: Boris Brezillon, Arnaud Ebalard
  Cc: David S. Miller, Herbert Xu, Thomas Petazzoni, Jason Cooper,
	Andrew Lunn, Sebastian Hesselbarth, Gregory Clement, Nadav Haklai,
	Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161005075633.12711-1-romain.perier@free-electrons.com>

So far, we used a dedicated dma pool to copy the result of outer IV for
cipher requests. Instead of using a dma pool per outer data, we prefer
use the op dma pool that contains all part of the request from the SRAM.
Then, the outer data that is likely to be used by the 'complete'
operation, is copied later. In this way, any type of result can be
retrieved by DMA for cipher or ahash requests.

Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---

Changes in v4:
  - Added a comment that explains why we retrieve the first op ctx
    of the chain.
  - Added the tag 'Acked-by'

Changes in v3:
  - Don't allocate a new op ctx for the last tdma descriptor. Instead
    we point to the last op ctx in the tdma chain, and copy the context
    of the current request to this location.

Changes in v2:
  - Use the dma pool "op" to retrieve outer data intead of introducing
    a new one.

 drivers/crypto/marvell/cesa.c   |  4 ----
 drivers/crypto/marvell/cesa.h   |  5 ++---
 drivers/crypto/marvell/cipher.c |  8 +++++---
 drivers/crypto/marvell/tdma.c   | 33 +++++++++++++++++++--------------
 4 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c
index 37dadb2..6e7a5c7 100644
--- a/drivers/crypto/marvell/cesa.c
+++ b/drivers/crypto/marvell/cesa.c
@@ -375,10 +375,6 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa)
 	if (!dma->padding_pool)
 		return -ENOMEM;
 
-	dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0);
-	if (!dma->iv_pool)
-		return -ENOMEM;
-
 	cesa->dma = dma;
 
 	return 0;
diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h
index e423d33..a768da7 100644
--- a/drivers/crypto/marvell/cesa.h
+++ b/drivers/crypto/marvell/cesa.h
@@ -277,7 +277,7 @@ struct mv_cesa_op_ctx {
 #define CESA_TDMA_DUMMY				0
 #define CESA_TDMA_DATA				1
 #define CESA_TDMA_OP				2
-#define CESA_TDMA_IV				3
+#define CESA_TDMA_RESULT			3
 
 /**
  * struct mv_cesa_tdma_desc - TDMA descriptor
@@ -393,7 +393,6 @@ struct mv_cesa_dev_dma {
 	struct dma_pool *op_pool;
 	struct dma_pool *cache_pool;
 	struct dma_pool *padding_pool;
-	struct dma_pool *iv_pool;
 };
 
 /**
@@ -839,7 +838,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain)
 	memset(chain, 0, sizeof(*chain));
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags);
 
 struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain,
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index d19dc96..098871a 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -212,7 +212,8 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req)
 		struct mv_cesa_req *basereq;
 
 		basereq = &creq->base;
-		memcpy(ablkreq->info, basereq->chain.last->data, ivsize);
+		memcpy(ablkreq->info, basereq->chain.last->op->ctx.blkcipher.iv,
+		       ivsize);
 	} else {
 		memcpy_fromio(ablkreq->info,
 			      engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET,
@@ -373,8 +374,9 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req,
 
 	/* Add output data for IV */
 	ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req));
-	ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET,
-				    ivsize, CESA_TDMA_SRC_IN_SRAM, flags);
+	ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET,
+				    CESA_SA_DATA_SRAM_OFFSET,
+				    CESA_TDMA_SRC_IN_SRAM, flags);
 
 	if (ret)
 		goto err_free_tdma;
diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c
index 9fd7a5f..4416b88 100644
--- a/drivers/crypto/marvell/tdma.c
+++ b/drivers/crypto/marvell/tdma.c
@@ -69,9 +69,6 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq)
 		if (type == CESA_TDMA_OP)
 			dma_pool_free(cesa_dev->dma->op_pool, tdma->op,
 				      le32_to_cpu(tdma->src));
-		else if (type == CESA_TDMA_IV)
-			dma_pool_free(cesa_dev->dma->iv_pool, tdma->data,
-				      le32_to_cpu(tdma->dst));
 
 		tdma = tdma->next;
 		dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma,
@@ -209,29 +206,37 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags)
 	return new_tdma;
 }
 
-int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
+int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src,
 			  u32 size, u32 flags, gfp_t gfp_flags)
 {
-
-	struct mv_cesa_tdma_desc *tdma;
-	u8 *iv;
-	dma_addr_t dma_handle;
+	struct mv_cesa_tdma_desc *tdma, *op_desc;
 
 	tdma = mv_cesa_dma_add_desc(chain, gfp_flags);
 	if (IS_ERR(tdma))
 		return PTR_ERR(tdma);
 
-	iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle);
-	if (!iv)
-		return -ENOMEM;
+	/* We re-use an existing op_desc object to retrieve the context
+	 * and result instead of allocating a new one.
+	 * There is at least one object of this type in a CESA crypto
+	 * req, just pick the first one in the chain.
+	 */
+	for (op_desc = chain->first; op_desc; op_desc = op_desc->next) {
+		u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK;
+
+		if (type == CESA_TDMA_OP)
+			break;
+	}
+
+	if (!op_desc)
+		return -EIO;
 
 	tdma->byte_cnt = cpu_to_le32(size | BIT(31));
 	tdma->src = src;
-	tdma->dst = cpu_to_le32(dma_handle);
-	tdma->data = iv;
+	tdma->dst = op_desc->src;
+	tdma->op = op_desc->op;
 
 	flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM);
-	tdma->flags = flags | CESA_TDMA_IV;
+	tdma->flags = flags | CESA_TDMA_RESULT;
 	return 0;
 }
 
-- 
2.9.3

^ permalink raw reply related

* [PATCH] Fix Kconfig dependencies for FIPS
From: Alec Ari @ 2016-10-04 22:34 UTC (permalink / raw)
  To: linux-crypto

Currently FIPS depends on MODULE_SIG, even if MODULES is disabled.
This change allows the enabling of FIPS without support for modules.

If module loading support is enabled, only then does
FIPS require MODULE_SIG.

Signed-off-by: Alec Ari <neotheuser@gmail.com>
---
  crypto/Kconfig | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 84d7148..fd28805 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -24,7 +24,7 @@ comment "Crypto core or helper"
  config CRYPTO_FIPS
  	bool "FIPS 200 compliance"
  	depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && 
!CRYPTO_MANAGER_DISABLE_TESTS
-	depends on MODULE_SIG
+	depends on (MODULE_SIG || !MODULES)
  	help
  	  This options enables the fips boot option which is
  	  required if you want to system to operate in a FIPS 200
-- 
2.7.3

^ permalink raw reply related

* Re: sha1_mb broken
From: Stephan Mueller @ 2016-10-04 16:27 UTC (permalink / raw)
  To: Tim Chen; +Cc: Dey, Megha, linux-crypto@vger.kernel.org
In-Reply-To: <1475597322.3916.283.camel@linux.intel.com>

Am Dienstag, 4. Oktober 2016, 09:08:42 CEST schrieb Tim Chen:

Hi Tim,

> There is a spin lock protecting the completion's wait_queue on the processes
> waiting for the completion of the job, and the queue head.  My suspicion is
> if these structures are not initialized properly, we fail to look up the
> waiting process in the queue properly to call it.  For the other tested
> cases, they may not be a true ahash operation in the sense of passing
> request through the crypto daemon, and have to context switch to let crypto
> daemon complete the job.  The computation proceeds
> and returns in the same call chain.

Thanks a lot for the clarification.

Ciao
Stephan

^ permalink raw reply

* Re: sha1_mb broken
From: Tim Chen @ 2016-10-04 16:08 UTC (permalink / raw)
  To: Stephan Mueller, Dey, Megha; +Cc: linux-crypto@vger.kernel.org
In-Reply-To: <2176107.gSz0A05ekE@tauon.atsec.com>

On Tue, 2016-10-04 at 16:10 +0200, Stephan Mueller wrote:
> Am Dienstag, 4. Oktober 2016, 00:25:07 CEST schrieb Dey, Megha:
> 
> Hi Megha,
> 
> > 
> > 
> > > 
> > > Hi Stephan,
> > > 
> > > Your test code initialized the completion structure incorrectly, that led
> > > to the missing completion from being received. The init_completion call
> > > should be made before the crypto_ahash_digest call. The following change
> Thanks a lot for pointing that one out. Can you help me understand why your 
> code trips over that issue whereas other ahash implementations do not (all 
> other SHA-1 or SHA-2 implementations work perfectly fine with that code)?
> 

There is a spin lock protecting the completion's wait_queue on the processes waiting for
the completion of the job, and the queue head.  My suspicion is if these
structures are not initialized properly, we fail to look up the waiting process in the queue
properly to call it.  For the other tested cases, they may not be a true ahash operation
in the sense of passing request through the crypto daemon, and have to context switch
to let crypto daemon complete the job.  The computation proceeds
and returns in the same call chain.

Thanks.

Tim

^ permalink raw reply

* Re: [PATCH v3 2/2] crypto: marvell - Don't break chain for computable last ahash requests
From: Boris Brezillon @ 2016-10-04 14:14 UTC (permalink / raw)
  To: Romain Perier
  Cc: Arnaud Ebalard, David S. Miller, Herbert Xu, Thomas Petazzoni,
	Jason Cooper, Andrew Lunn, Sebastian Hesselbarth, Gregory Clement,
	Nadav Haklai, Ofer Heifetz, linux-crypto, linux-arm-kernel
In-Reply-To: <20161004125720.3347-3-romain.perier@free-electrons.com>

On Tue,  4 Oct 2016 14:57:20 +0200
Romain Perier <romain.perier@free-electrons.com> wrote:

> Currently, the driver breaks chain for all kind of hash requests in order to
> don't override intermediate states of partial ahash updates. However, some final
> ahash requests can be directly processed by the engine, and so without
> intermediate state. This is typically the case for most for the HMAC requests
> processed via IPSec.
> 
> This commits adds a TDMA descriptor to copy context for these of requests
> into the "op" dma pool, then it allow to chain these requests at the DMA level.
> The 'complete' operation is also updated to retrieve the MAC digest from the
> right location.
> 
> Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
> ---
> 
> Changes in v3:
>  - Copy the whole context back to RAM and not just the digest. Also
>    fixed a rebase issue ^^ (whoops)
> 
> Changes in v2:
>  - Replaced BUG_ON by an error
>  - Add a variable "break_chain", with "type" to break the chain
> 
>    with ahash requests. It improves code readability.
>  drivers/crypto/marvell/hash.c | 79 +++++++++++++++++++++++++++++++++++--------
>  1 file changed, 64 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
> index 9f28468..b36f196 100644
> --- a/drivers/crypto/marvell/hash.c
> +++ b/drivers/crypto/marvell/hash.c
> @@ -312,24 +312,53 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req)
>  	int i;
>  
>  	digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq));
> -	for (i = 0; i < digsize / 4; i++)
> -		creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i));
>  
> -	if (creq->last_req) {
> +	if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ &&
> +	    !(creq->base.chain.last->flags & CESA_TDMA_BREAK_CHAIN)) {
> +		struct mv_cesa_tdma_desc *tdma = NULL;
> +		__le32 *data = NULL;
> +
> +		for (tdma = creq->base.chain.first; tdma; tdma = tdma->next) {
> +			u32 type = tdma->flags & CESA_TDMA_TYPE_MSK;
> +			if (type ==  CESA_TDMA_RESULT)
> +				break;
> +		}

You should be able to drop the DUMMY desc at the end of the chain and
replace it by the RESULT desc. This way, you won't have to iterate over
the chain to find the TDMA_RESULT element: it should always be the last
desc in the chain.

> +
> +		if (!tdma) {
> +			dev_err(cesa_dev->dev, "Failed to retrieve tdma "
> +					       "descriptor for outer data\n");
> +			return;
> +		}
> +
>  		/*
> -		 * Hardware's MD5 digest is in little endian format, but
> -		 * SHA in big endian format
> +		 * Result is already in the correct endianess when the SA is
> +		 * used
>  		 */
> -		if (creq->algo_le) {
> -			__le32 *result = (void *)ahashreq->result;
> +		data = tdma->op->ctx.hash.hash;
> +		for (i = 0; i < digsize / 4; i++)
> +			creq->state[i] = cpu_to_le32(data[i]);
>  
> -			for (i = 0; i < digsize / 4; i++)
> -				result[i] = cpu_to_le32(creq->state[i]);
> -		} else {
> -			__be32 *result = (void *)ahashreq->result;
> +		memcpy(ahashreq->result, data, digsize);
> +	} else {
> +		for (i = 0; i < digsize / 4; i++)
> +			creq->state[i] = readl_relaxed(engine->regs +
> +						       CESA_IVDIG(i));
> +		if (creq->last_req) {
> +			/*
> +			* Hardware's MD5 digest is in little endian format, but
> +			* SHA in big endian format
> +			*/
> +			if (creq->algo_le) {
> +				__le32 *result = (void *)ahashreq->result;
> +
> +				for (i = 0; i < digsize / 4; i++)
> +					result[i] = cpu_to_le32(creq->state[i]);
> +			} else {
> +				__be32 *result = (void *)ahashreq->result;
>  
> -			for (i = 0; i < digsize / 4; i++)
> -				result[i] = cpu_to_be32(creq->state[i]);
> +				for (i = 0; i < digsize / 4; i++)
> +					result[i] = cpu_to_be32(creq->state[i]);
> +			}
>  		}
>  	}
>  
> @@ -504,6 +533,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain,
>  						CESA_SA_DESC_CFG_LAST_FRAG,
>  				      CESA_SA_DESC_CFG_FRAG_MSK);
>  
> +		ret = mv_cesa_dma_add_result_op(chain,
> +						CESA_SA_CFG_SRAM_OFFSET,
> +						CESA_SA_DATA_SRAM_OFFSET,
> +						CESA_TDMA_SRC_IN_SRAM, flags);
> +		if (ret)
> +			return ERR_PTR(-ENOMEM);
>  		return op;
>  	}
>  
> @@ -564,6 +599,8 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  	struct mv_cesa_op_ctx *op = NULL;
>  	unsigned int frag_len;
>  	int ret;
> +	u32 type;
> +	bool break_chain = true;
>  
>  	basereq->chain.first = NULL;
>  	basereq->chain.last = NULL;
> @@ -635,6 +672,16 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  		goto err_free_tdma;
>  	}
>  
> +	/*
> +	 * If results are copied via DMA, this means that this
> +	 * request can be directly processed by the engine,
> +	 * without partial updates. So we can chain it at the
> +	 * DMA level with other requests.
> +	 */
> +	type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK;
> +	if (type == CESA_TDMA_RESULT)
> +		break_chain = false;
> +
>  	if (op) {
>  		/* Add dummy desc to wait for crypto operation end */
>  		ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags);
> @@ -648,8 +695,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req)
>  	else
>  		creq->cache_ptr = 0;
>  
> -	basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ |
> -				       CESA_TDMA_BREAK_CHAIN);
> +	basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ;
> +
> +	if (break_chain)
> +		basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN;

Not sure this break_chain variable is really needed. you can directly
test the type of the last element in the TDMA chain here and if it's
!= CESA_TDMA_RESULT, pass the CESA_TDMA_BREAK_CHAIN flag.

>  
>  	return 0;
>  

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox