Linux cryptographic layer development
 help / color / mirror / Atom feed
* [PATCH 09/10] crypto: caam - refactor ahash shared descriptor generation
From: Horia Geantă @ 2016-11-22 13:44 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David S. Miller, linux-crypto, Dan Douglass, Tudor Ambarus,
	Alexandru Porosanu
In-Reply-To: <1479822252-23833-1-git-send-email-horia.geanta@nxp.com>

Move ahash shared descriptor generation into a single function.
Currently there is no plan to support ahash on any other interface
besides the Job Ring, thus for now the functionality is not exported.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
 drivers/crypto/caam/caamhash.c | 130 ++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 94 deletions(-)

diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 505007d0277c..e58639ea53b1 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -217,86 +217,54 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev,
 	return 0;
 }
 
-/* Common shared descriptor commands */
-static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-	append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
-			  ctx->adata.keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT |
-			  KEY_ENC);
-}
-
-/* Append key if it has been set */
-static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx)
-{
-	u32 *key_jump_cmd;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	if (ctx->adata.keylen) {
-		/* Skip if already shared */
-		key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
-					   JUMP_COND_SHRD);
-
-		append_key_ahash(desc, ctx);
-
-		set_jump_tgt_here(desc, key_jump_cmd);
-	}
-}
-
 /*
- * For ahash read data from seqin following state->caam_ctx,
- * and write resulting class2 context to seqout, which may be state->caam_ctx
- * or req->result
+ * For ahash update, final and finup (import_ctx = true)
+ *     import context, read and write to seqout
+ * For ahash firsts and digest (import_ctx = false)
+ *     read and write to seqout
  */
-static inline void ahash_append_load_str(u32 *desc, int digestsize)
+static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize,
+				     struct caam_hash_ctx *ctx, bool import_ctx)
 {
-	/* Calculate remaining bytes to read */
-	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	u32 op = ctx->adata.algtype;
+	u32 *skip_key_load;
 
-	/* Read remaining bytes */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
-			     FIFOLD_TYPE_MSG | KEY_VLF);
-
-	/* Store class2 context bytes */
-	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
-			 LDST_SRCDST_BYTE_CONTEXT);
-}
+	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
-/*
- * For ahash update, final and finup, import context, read and write to seqout
- */
-static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state,
-					 int digestsize,
-					 struct caam_hash_ctx *ctx)
-{
-	init_sh_desc_key_ahash(desc, ctx);
+	/* Append key if it has been set; ahash update excluded */
+	if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) {
+		/* Skip key loading if already shared */
+		skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL |
+					    JUMP_COND_SHRD);
 
-	/* Import context from software */
-	append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
-			LDST_SRCDST_BYTE_CONTEXT);
+		append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad,
+				  ctx->adata.keylen, CLASS_2 |
+				  KEY_DEST_MDHA_SPLIT | KEY_ENC);
 
-	/* Class 2 operation */
-	append_operation(desc, op | state | OP_ALG_ENCRYPT);
+		set_jump_tgt_here(desc, skip_key_load);
 
-	/*
-	 * Load from buf and/or src and write to req->result or state->context
-	 */
-	ahash_append_load_str(desc, digestsize);
-}
+		op |= OP_ALG_AAI_HMAC_PRECOMP;
+	}
 
-/* For ahash firsts and digest, read and write to seqout */
-static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state,
-				     int digestsize, struct caam_hash_ctx *ctx)
-{
-	init_sh_desc_key_ahash(desc, ctx);
+	/* If needed, import context from software */
+	if (import_ctx)
+		append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
+				LDST_SRCDST_BYTE_CONTEXT);
 
 	/* Class 2 operation */
 	append_operation(desc, op | state | OP_ALG_ENCRYPT);
 
 	/*
 	 * Load from buf and/or src and write to req->result or state->context
+	 * Calculate remaining bytes to read
 	 */
-	ahash_append_load_str(desc, digestsize);
+	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+	/* Read remaining bytes */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 |
+			     FIFOLD_TYPE_MSG | KEY_VLF);
+	/* Store class2 context bytes */
+	append_seq_store(desc, digestsize, LDST_CLASS_2_CCB |
+			 LDST_SRCDST_BYTE_CONTEXT);
 }
 
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
@@ -304,28 +272,11 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
-	u32 have_key = 0;
 	u32 *desc;
 
-	if (ctx->adata.keylen)
-		have_key = OP_ALG_AAI_HMAC_PRECOMP;
-
 	/* ahash_update shared descriptor */
 	desc = ctx->sh_desc_update;
-
-	init_sh_desc(desc, HDR_SHARE_SERIAL);
-
-	/* Import context from software */
-	append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
-			LDST_SRCDST_BYTE_CONTEXT);
-
-	/* Class 2 operation */
-	append_operation(desc, ctx->adata.algtype | OP_ALG_AS_UPDATE |
-			 OP_ALG_ENCRYPT);
-
-	/* Load data and write to result or context */
-	ahash_append_load_str(desc, ctx->ctx_len);
-
+	ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true);
 	ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
 						 DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) {
@@ -340,10 +291,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_update_first shared descriptor */
 	desc = ctx->sh_desc_update_first;
-
-	ahash_data_to_out(desc, have_key | ctx->adata.algtype, OP_ALG_AS_INIT,
-			  ctx->ctx_len, ctx);
-
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false);
 	ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc,
 						       desc_bytes(desc),
 						       DMA_TO_DEVICE);
@@ -359,10 +307,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_final shared descriptor */
 	desc = ctx->sh_desc_fin;
-
-	ahash_ctx_data_to_out(desc, have_key | ctx->adata.algtype,
-			      OP_ALG_AS_FINALIZE, digestsize, ctx);
-
+	ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true);
 	ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
 					      DMA_TO_DEVICE);
 	if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) {
@@ -377,10 +322,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 	/* ahash_digest shared descriptor */
 	desc = ctx->sh_desc_digest;
-
-	ahash_data_to_out(desc, have_key | ctx->adata.algtype,
-			  OP_ALG_AS_INITFINAL, digestsize, ctx);
-
+	ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false);
 	ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc,
 						 desc_bytes(desc),
 						 DMA_TO_DEVICE);
-- 
2.4.4

^ permalink raw reply related

* [PATCH 05/10] crypto: caam - rewrite some generic inline append cmds
From: Horia Geantă @ 2016-11-22 13:44 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David S. Miller, linux-crypto, Dan Douglass, Tudor Ambarus,
	Alexandru Porosanu
In-Reply-To: <1479822252-23833-1-git-send-email-horia.geanta@nxp.com>

A few descriptor commands are generated using generic
inline append "append_cmd" function.
Rewrite them using specific inline append functions.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
 drivers/crypto/caam/caamalg.c  | 20 ++++++++++----------
 drivers/crypto/caam/caamhash.c |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 5f332df1a8e6..9cb95f5b2eb3 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -1853,9 +1853,9 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			  ctx->cdata.keylen, CLASS_1 | KEY_DEST_CLASS_REG);
 
 	/* Load sector size with index 40 bytes (0x28) */
-	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-	append_data(desc, (void *)&sector_size, 8);
+	append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+			   LDST_SRCDST_BYTE_CONTEXT |
+			   (0x28 << LDST_OFFSET_SHIFT));
 
 	set_jump_tgt_here(desc, key_jump_cmd);
 
@@ -1864,8 +1864,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	 * Upper 8B of IV - will be used as sector index
 	 * Lower 8B of IV - will be discarded
 	 */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
+	append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+			(0x20 << LDST_OFFSET_SHIFT));
 	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
 
 	/* Load operation */
@@ -1900,9 +1900,9 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 			  ctx->cdata.keylen, CLASS_1 | KEY_DEST_CLASS_REG);
 
 	/* Load sector size with index 40 bytes (0x28) */
-	append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8);
-	append_data(desc, (void *)&sector_size, 8);
+	append_load_as_imm(desc, (void *)&sector_size, 8, LDST_CLASS_1_CCB |
+			   LDST_SRCDST_BYTE_CONTEXT |
+			   (0x28 << LDST_OFFSET_SHIFT));
 
 	set_jump_tgt_here(desc, key_jump_cmd);
 
@@ -1911,8 +1911,8 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher,
 	 * Upper 8B of IV - will be used as sector index
 	 * Lower 8B of IV - will be discarded
 	 */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8);
+	append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB |
+			(0x20 << LDST_OFFSET_SHIFT));
 	append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
 
 	/* Load operation */
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 8e4530d68208..d3f0ae16a73b 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -272,8 +272,8 @@ static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state,
 	init_sh_desc_key_ahash(desc, ctx);
 
 	/* Import context from software */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_2_CCB | ctx->ctx_len);
+	append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
+			LDST_SRCDST_BYTE_CONTEXT);
 
 	/* Class 2 operation */
 	append_operation(desc, op | state | OP_ALG_ENCRYPT);
@@ -316,8 +316,8 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Import context from software */
-	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT |
-		   LDST_CLASS_2_CCB | ctx->ctx_len);
+	append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB |
+			LDST_SRCDST_BYTE_CONTEXT);
 
 	/* Class 2 operation */
 	append_operation(desc, ctx->adata.algtype | OP_ALG_AS_UPDATE |
-- 
2.4.4

^ permalink raw reply related

* [PATCH 04/10] crypto: caam - improve key inlining
From: Horia Geantă @ 2016-11-22 13:44 UTC (permalink / raw)
  To: Herbert Xu
  Cc: David S. Miller, linux-crypto, Dan Douglass, Tudor Ambarus,
	Alexandru Porosanu
In-Reply-To: <1479822252-23833-1-git-send-email-horia.geanta@nxp.com>

For authenc / stitched AEAD algorithms, check independently
each of the two (authentication, encryption) keys whether inlining
is possible.
Prioritize the inlining of the authentication key, since the length
of the (split) key is bigger than that of the encryption key.

For the other algorithms, compute only once per tfm the remaining
available bytes and decide whether key inlining is possible
based on this.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
---
 drivers/crypto/caam/caamalg.c     | 130 ++++++++++++++++++++++----------------
 drivers/crypto/caam/desc_constr.h |  39 ++++++++++++
 2 files changed, 116 insertions(+), 53 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 48fc000d86bf..5f332df1a8e6 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -224,7 +224,7 @@ struct caam_ctx {
 };
 
 static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
-				  int keys_fit_inline, bool is_rfc3686)
+				  bool is_rfc3686)
 {
 	u32 *key_jump_cmd;
 	unsigned int enckeylen = ctx->cdata.keylen;
@@ -244,18 +244,20 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
 	if (is_rfc3686)
 		enckeylen -= CTR_RFC3686_NONCE_SIZE;
 
-	if (keys_fit_inline) {
+	if (ctx->adata.key_inline)
 		append_key_as_imm(desc, (void *)ctx->adata.key,
 				  ctx->adata.keylen_pad, ctx->adata.keylen,
 				  CLASS_2 | KEY_DEST_MDHA_SPLIT | KEY_ENC);
-		append_key_as_imm(desc, (void *)ctx->cdata.key, enckeylen,
-				  enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
-	} else {
+	else
 		append_key(desc, ctx->adata.key, ctx->adata.keylen, CLASS_2 |
 			   KEY_DEST_MDHA_SPLIT | KEY_ENC);
+
+	if (ctx->cdata.key_inline)
+		append_key_as_imm(desc, (void *)ctx->cdata.key, enckeylen,
+				  enckeylen, CLASS_1 | KEY_DEST_CLASS_REG);
+	else
 		append_key(desc, ctx->cdata.key, enckeylen, CLASS_1 |
 			   KEY_DEST_CLASS_REG);
-	}
 
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686) {
@@ -282,13 +284,14 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 	struct device *jrdev = ctx->jrdev;
 	u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN -
+			ctx->adata.keylen_pad;
 
 	/*
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_NULL_ENC_LEN + AEAD_DESC_JOB_IO_LEN +
-	    ctx->adata.keylen_pad <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_AEAD_NULL_ENC_LEN) {
 		ctx->adata.key_inline = true;
 		ctx->adata.key = (uintptr_t)ctx->key;
 	} else {
@@ -368,8 +371,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN +
-	    ctx->adata.keylen_pad <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_AEAD_NULL_DEC_LEN) {
 		ctx->adata.key_inline = true;
 		ctx->adata.key = (uintptr_t)ctx->key;
 	} else {
@@ -463,10 +465,11 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
-	bool keys_fit_inline;
 	u32 geniv, moveiv;
 	u32 ctx1_iv_off = 0;
 	u32 *desc;
+	u32 inl_mask;
+	unsigned int data_len[2];
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
@@ -493,6 +496,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	if (is_rfc3686)
 		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
 
+	data_len[0] = ctx->adata.keylen_pad;
+	data_len[1] = ctx->cdata.keylen;
+
 	if (alg->caam.geniv)
 		goto skip_enc;
 
@@ -500,24 +506,30 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_ENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->adata.keylen_pad + ctx->cdata.keylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX) {
-		keys_fit_inline = true;
+	if (desc_inline_query(DESC_AEAD_ENC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
+
+	if (inl_mask & 1)
 		ctx->adata.key = (uintptr_t)ctx->key;
-		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
-	} else {
-		keys_fit_inline = false;
+	else
 		ctx->adata.key = ctx->key_dma;
+
+	if (inl_mask & 2)
+		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
+	else
 		ctx->cdata.key = ctx->key_dma + ctx->adata.keylen_pad;
-	}
+
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
 	/* aead_encrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
 
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
+	init_sh_desc_key_aead(desc, ctx, is_rfc3686);
 
 	/* Class 2 operation */
 	append_operation(desc, ctx->adata.algtype | OP_ALG_AS_INITFINAL |
@@ -572,24 +584,30 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_DEC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->adata.keylen_pad + ctx->cdata.keylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX) {
-		keys_fit_inline = true;
+	if (desc_inline_query(DESC_AEAD_DEC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
+
+	if (inl_mask & 1)
 		ctx->adata.key = (uintptr_t)ctx->key;
-		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
-	} else {
-		keys_fit_inline = false;
+	else
 		ctx->adata.key = ctx->key_dma;
+
+	if (inl_mask & 2)
+		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
+	else
 		ctx->cdata.key = ctx->key_dma + ctx->adata.keylen_pad;
-	}
+
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
 	/* aead_decrypt shared descriptor */
 	desc = ctx->sh_desc_dec;
 
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
+	init_sh_desc_key_aead(desc, ctx, is_rfc3686);
 
 	/* Class 2 operation */
 	append_operation(desc, ctx->adata.algtype | OP_ALG_AS_INITFINAL |
@@ -660,24 +678,30 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_AEAD_GIVENC_LEN + AUTHENC_DESC_JOB_IO_LEN +
-	    ctx->adata.keylen_pad + ctx->cdata.keylen +
-	    (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <=
-	    CAAM_DESC_BYTES_MAX) {
-		keys_fit_inline = true;
+	if (desc_inline_query(DESC_AEAD_GIVENC_LEN +
+			      (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0),
+			      AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask,
+			      ARRAY_SIZE(data_len)) < 0)
+		return -EINVAL;
+
+	if (inl_mask & 1)
 		ctx->adata.key = (uintptr_t)ctx->key;
-		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
-	} else {
-		keys_fit_inline = false;
+	else
 		ctx->adata.key = ctx->key_dma;
+
+	if (inl_mask & 2)
+		ctx->cdata.key = (uintptr_t)(ctx->key + ctx->adata.keylen_pad);
+	else
 		ctx->cdata.key = ctx->key_dma + ctx->adata.keylen_pad;
-	}
+
+	ctx->adata.key_inline = !!(inl_mask & 1);
+	ctx->cdata.key_inline = !!(inl_mask & 2);
 
 	/* aead_givencrypt shared descriptor */
 	desc = ctx->sh_desc_enc;
 
 	/* Note: Context registers are saved. */
-	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
+	init_sh_desc_key_aead(desc, ctx, is_rfc3686);
 
 	if (is_rfc3686)
 		goto copy_iv;
@@ -787,6 +811,8 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	u32 *key_jump_cmd, *zero_payload_jump_cmd,
 	    *zero_assoc_jump_cmd1, *zero_assoc_jump_cmd2;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -796,8 +822,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_GCM_ENC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
@@ -895,8 +920,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_GCM_DEC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
@@ -996,6 +1020,8 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	struct device *jrdev = ctx->jrdev;
 	u32 *key_jump_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -1005,8 +1031,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_RFC4106_ENC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
@@ -1084,8 +1109,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4106_DEC_LEN + DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_RFC4106_DEC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
@@ -1180,6 +1204,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	u32 *key_jump_cmd;
 	u32 *read_move_cmd, *write_move_cmd;
 	u32 *desc;
+	int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN -
+			ctx->cdata.keylen;
 
 	if (!ctx->cdata.keylen || !ctx->authsize)
 		return 0;
@@ -1189,8 +1215,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptor
 	 * must fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_RFC4543_ENC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
@@ -1267,8 +1292,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	 * Job Descriptor and Shared Descriptors
 	 * must all fit into the 64-word Descriptor h/w Buffer
 	 */
-	if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN +
-	    ctx->cdata.keylen <= CAAM_DESC_BYTES_MAX) {
+	if (rem_bytes >= DESC_RFC4543_DEC_LEN) {
 		ctx->cdata.key_inline = true;
 		ctx->cdata.key = (uintptr_t)ctx->key;
 	} else {
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index bfef7952dfb7..fa70c0d79c40 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -449,3 +449,42 @@ struct alginfo {
 	u64 key;
 	bool key_inline;
 };
+
+/**
+ * desc_inline_query() - Provide indications on which data items can be inlined
+ *                       and which shall be referenced in a shared descriptor.
+ * @sd_base_len: Shared descriptor base length - bytes consumed by the commands,
+ *               excluding the data items to be inlined (or corresponding
+ *               pointer if an item is not inlined). Each cnstr_* function that
+ *               generates descriptors should have a define mentioning
+ *               corresponding length.
+ * @jd_len: Maximum length of the job descriptor(s) that will be used
+ *          together with the shared descriptor.
+ * @data_len: Array of lengths of the data items trying to be inlined
+ * @inl_mask: 32bit mask with bit x = 1 if data item x can be inlined, 0
+ *            otherwise.
+ * @count: Number of data items (size of @data_len array); must be <= 32
+ *
+ * Return: 0 if data can be inlined / referenced, negative value if not. If 0,
+ *         check @inl_mask for details.
+ */
+static inline int desc_inline_query(unsigned int sd_base_len,
+				    unsigned int jd_len, unsigned int *data_len,
+				    u32 *inl_mask, unsigned int count)
+{
+	int rem_bytes = (int)(CAAM_DESC_BYTES_MAX - sd_base_len - jd_len);
+	unsigned int i;
+
+	*inl_mask = 0;
+	for (i = 0; (i < count) && (rem_bytes > 0); i++) {
+		if (rem_bytes - (int)(data_len[i] +
+			(count - i - 1) * CAAM_PTR_SZ) >= 0) {
+			rem_bytes -= data_len[i];
+			*inl_mask |= (1 << i);
+		} else {
+			rem_bytes -= CAAM_PTR_SZ;
+		}
+	}
+
+	return (rem_bytes >= 0) ? 0 : -1;
+}
-- 
2.4.4

^ permalink raw reply related

* Re: [PATCH v3] arm64/crypto: Accelerated CRC T10 DIF computation
From: Ard Biesheuvel @ 2016-11-22 13:38 UTC (permalink / raw)
  To: YueHaibing
  Cc: Herbert Xu, David S. Miller, Catalin Marinas, Will Deacon,
	linux-kernel@vger.kernel.org, Hanjun Guo, dingtinahong,
	yangshengkai, linux-arm-kernel@lists.infradead.org,
	linux-crypto@vger.kernel.org
In-Reply-To: <CAKv+Gu9x+Z6Wx_jXQvFBuD4nQFF7rhz7M0KZmPWqUgfsfVz-OA@mail.gmail.com>

On 22 November 2016 at 12:53, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 22 November 2016 at 10:14, YueHaibing <yuehaibing@huawei.com> wrote:
>> This is the ARM64 CRC T10 DIF transform accelerated with the ARMv8
>> NEON instruction.The config CRYPTO_CRCT10DIF_NEON should be turned
>> on to enable the feature.The crc_t10dif crypto library function will
>> use this faster algorithm when crct10dif_neon module is loaded.
>>
>
> What is this algorithm commonly used for? In other words, why is it a
> good idea to add support for this algorithm to the kernel?
>
>> Tcrypt benchmark results:
>>
>> HIP06  (mode=320 sec=2)
>>
>> The ratio of bytes/sec crct10dif-neon Vs. crct10dif-generic:
>>
>>                         TEST                              neon          generic         ratio
>>   16 byte blocks,   16 bytes per update,   1 updates    214506112       171095400       1.25
>>   64 byte blocks,   16 bytes per update,   4 updates    139385312       119036352       1.17
>>   64 byte blocks,   64 bytes per update,   1 updates    671523712       198945344       3.38
>>  256 byte blocks,   16 bytes per update,  16 updates    157674880       125146752       1.26
>>  256 byte blocks,   64 bytes per update,   4 updates    491888128       175764096       2.80
>>  256 byte blocks,  256 bytes per update,   1 updates    2123298176      206995200       10.26
>> 1024 byte blocks,   16 bytes per update,  64 updates    161243136       126460416       1.28
>> 1024 byte blocks,  256 bytes per update,   4 updates    1643020800      200027136       8.21
>> 1024 byte blocks, 1024 bytes per update,   1 updates    4238239232      209106432       20.27
>> 2048 byte blocks,   16 bytes per update, 128 updates    162079744       126953472       1.28
>> 2048 byte blocks,  256 bytes per update,   8 updates    1693587456      200867840       8.43
>> 2048 byte blocks, 1024 bytes per update,   2 updates    3424323584      206330880       16.60
>> 2048 byte blocks, 2048 bytes per update,   1 updates    5228207104      208620544       25.06
>> 4096 byte blocks,   16 bytes per update, 256 updates    162304000       126894080       1.28
>> 4096 byte blocks,  256 bytes per update,  16 updates    1731862528      201197568       8.61
>> 4096 byte blocks, 1024 bytes per update,   4 updates    3668625408      207003648       17.72
>> 4096 byte blocks, 4096 bytes per update,   1 updates    5551239168      209127424       26.54
>> 8192 byte blocks,   16 bytes per update, 512 updates    162779136       126984192       1.28
>> 8192 byte blocks,  256 bytes per update,  32 updates    1753702400      201420800       8.71
>> 8192 byte blocks, 1024 bytes per update,   8 updates    3760918528      207351808       18.14
>> 8192 byte blocks, 4096 bytes per update,   2 updates    5483655168      208928768       26.25
>> 8192 byte blocks, 8192 bytes per update,   1 updates    5623377920      209108992       26.89
>>
>> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
>> Signed-off-by: YangShengkai <yangshengkai@huawei.com>
>> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>>
>> ---
>>  arch/arm64/crypto/Kconfig                 |   5 +
>>  arch/arm64/crypto/Makefile                |   4 +
>>  arch/arm64/crypto/crct10dif-neon-asm_64.S | 751 ++++++++++++++++++++++++++++++
>>  arch/arm64/crypto/crct10dif-neon_glue.c   | 115 +++++
>>  4 files changed, 875 insertions(+)
>>  create mode 100644 arch/arm64/crypto/crct10dif-neon-asm_64.S
>>  create mode 100644 arch/arm64/crypto/crct10dif-neon_glue.c
>>
>> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
>> index 2cf32e9..2e450bf 100644
>> --- a/arch/arm64/crypto/Kconfig
>> +++ b/arch/arm64/crypto/Kconfig
>> @@ -23,6 +23,11 @@ config CRYPTO_GHASH_ARM64_CE
>>         depends on ARM64 && KERNEL_MODE_NEON
>>         select CRYPTO_HASH
>>
>> +config CRYPTO_CRCT10DIF_NEON
>> +       tristate "CRCT10DIF hardware acceleration using NEON instructions"
>> +       depends on ARM64 && KERNEL_MODE_NEON
>> +       select CRYPTO_HASH
>> +
>
> Could you please follow the existing pattern:
>
> config CRYPTO_CRCT10DIF_ARM64_NEON
>
>
>>  config CRYPTO_AES_ARM64_CE
>>         tristate "AES core cipher using ARMv8 Crypto Extensions"
>>         depends on ARM64 && KERNEL_MODE_NEON
>> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
>> index abb79b3..6c9ff2c 100644
>> --- a/arch/arm64/crypto/Makefile
>> +++ b/arch/arm64/crypto/Makefile
>> @@ -29,6 +29,10 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
>>  obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
>>  aes-neon-blk-y := aes-glue-neon.o aes-neon.o
>>
>> +obj-$(CONFIG_CRYPTO_CRCT10DIF_NEON) += crct10dif-neon.o
>> +crct10dif-neon-y := crct10dif-neon-asm_64.o crct10dif-neon_glue.o
>> +AFLAGS_crct10dif-neon-asm_64.o := -march=armv8-a+crypto
>> +
>
> Please drop this line, and add
>
> .cpu generic+crypto
>
> to the .S file
>
>>  AFLAGS_aes-ce.o                := -DINTERLEAVE=4
>>  AFLAGS_aes-neon.o      := -DINTERLEAVE=4
>>
>> diff --git a/arch/arm64/crypto/crct10dif-neon-asm_64.S b/arch/arm64/crypto/crct10dif-neon-asm_64.S
>> new file mode 100644
>> index 0000000..2ae3033
>> --- /dev/null
>> +++ b/arch/arm64/crypto/crct10dif-neon-asm_64.S
>> @@ -0,0 +1,751 @@
>> +/*
>> + * Copyright (c) 2016-2017 Hisilicon Limited.
>> + *
>
> Please drop the 2017 here.
>
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + */
>> +
>> +#include <linux/linkage.h>
>> +#include <asm/assembler.h>
>> +
>> +.global crc_t10dif_neon
>
> Please drop this .global, and use ENTRY() below
>
>> +.text
>> +
>> +/* X0 is initial CRC value
>> + * X1 is data buffer
>> + * X2 is the length of buffer
>> + * X3 is the backup buffer(for extend)
>> + * X4 for other extend parameter(for extend)
>> + * Q0, Q1, Q2, Q3 maybe as parameter for other functions,
>> + * the value of Q0, Q1, Q2, Q3 maybe modified.
>> + *
>> + * suggestion:
>> + * 1. dont use general purpose register for calculation
>> + * 2. set data endianness outside of the kernel
>> + * 3. use ext as shifting around
>> + * 4. dont use LD3/LD4, ST3/ST4
>> + */
>> +
>
>
> Whose suggestions are these, and why? I do appreciate comments like
> this, but only if I can learn something from it
>
>> +crc_t10dif_neon:
>
> ENTRY()
>
>> +       /* push the register to stack that CRC16 will use */
>> +       STP             X5, X6, [sp, #-0x10]!
>
> Please use an ordinary stack frame, i.e.,
>
> stp   x29, x30, [sp, #-xxx]!
> mov x29, sp
>
> where xxx is the entire allocation you need for stacking callee save registers
>
>> +       STP             X7, X8, [sp, #-0x10]!
>> +       STP             X9, X10, [sp, #-0x10]!
>> +       STP             X11, X12, [sp, #-0x10]!
>> +       STP             X13, X14, [sp, #-0x10]!
>
> These are not callee save, so no need to stack them
>
>> +       STP             Q10, Q11, [sp, #-0x20]!
>> +       STP             Q12, Q13, [sp, #-0x20]!
>> +       STP             Q4, Q5, [sp, #-0x20]!
>> +       STP             Q6, Q7, [sp, #-0x20]!
>> +       STP             Q8, Q9, [sp, #-0x20]!
>> +       STP             Q14, Q15, [sp, #-0x20]!
>> +       STP             Q16, Q17, [sp, #-0x20]!
>> +       STP             Q18, Q19, [sp, #-0x20]!
>> +
>
> What is the point of stacking the NEON registers? Also, as a general
> note, could you switch to lower case throughout the file?
>
>
>> +       SUB             sp,sp,#0x20
>> +
>
> Please account for locals in the allocation above. Only outgoing
> arguments should be allocated below the frame pointer
>
>
>> +       MOV             X11, #0         // PUSH STACK FLAG
>> +
>
> What does this comment mean?
>
>> +       CMP             X2, #0x80
>> +       B.LT            2f              // _less_than_128, <128
>> +
>
> Redundant comment
>
>> +       /* V10/V11/V12/V13      is 128bit.
>> +        * we get data 512bit( by cacheline ) each time
>> +        */
>> +       LDP             Q10, Q11, [X1], #0x20
>> +       LDP             Q12, Q13, [X1], #0x20
>> +
>> +       /* move the initial value to V6 register */
>> +       LSL             X0, X0, #48
>> +       EOR             V6.16B, V6.16B, V6.16B
>> +       MOV             V6.D[1], X0
>> +
>> +       /* big-little end change. because the data in memory is little-end,
>> +        * we deal the data for bigend
>> +        */
>> +
>
> What if I am using a big endian kernel? Hint: you probably need to
> wrap these in CPU_LE()
>
>> +       REV64           V10.16B, V10.16B
>> +       REV64           V11.16B, V11.16B
>> +       REV64           V12.16B, V12.16B
>> +       REV64           V13.16B, V13.16B
>> +       EXT             V10.16B, V10.16B, V10.16B, #8
>> +       EXT             V11.16B, V11.16B, V11.16B, #8
>> +       EXT             V12.16B, V12.16B, V12.16B, #8
>> +       EXT             V13.16B, V13.16B, V13.16B, #8
>> +
>> +       EOR             V10.16B, V10.16B, V6.16B
>> +
>> +       SUB             X2, X2, #0x80
>> +       ADD             X5, X1, #0x20
>> +
>> +       /* deal data when the size of buffer bigger than 128 bytes */
>> +       /* _fold_64_B_loop */
>> +       LDR             Q6,=0xe658000000000000044c000000000000
>
> Could you move all these non-trivial constants to a separate location
> (after the end of the function), and name them?
>
>> +1:
>> +
>> +       LDP             Q16, Q17, [X1] ,#0x40
>> +       LDP             Q18, Q19, [X5], #0x40
>> +
>> +       /* carry-less multiply.
>> +        * V10 high-64bits carry-less multiply
>> +        * V6 high-64bits(PMULL2)
>> +        * V11 low-64bits carry-less multiply V6 low-64bits(PMULL)
>> +        */
>> +
>> +       PMULL2          V4.1Q, V10.2D, V6.2D
>> +       PMULL           V10.1Q, V10.1D, V6.1D
>> +       PMULL2          V5.1Q, V11.2D, V6.2D
>> +       PMULL           V11.1Q, V11.1D, V6.1D
>> +
>
> These instructions are only available if you have the PMULL extension,
> so this algorithm is not plain NEON.
>
>> +       REV64           V16.16B, V16.16B
>> +       REV64           V17.16B, V17.16B
>> +       REV64           V18.16B, V18.16B
>> +       REV64           V19.16B, V19.16B
>> +
>
> Endian swap on LE only?
>
>> +       PMULL2          V14.1Q, V12.2D, V6.2D
>> +       PMULL           V12.1Q, V12.1D, V6.1D
>> +       PMULL2          V15.1Q, V13.2D, V6.2D
>> +       PMULL           V13.1Q, V13.1D, V6.1D
>> +
>> +       EXT             V16.16B, V16.16B, V16.16B, #8
>> +       EOR             V10.16B, V10.16B, V4.16B
>> +
>> +       EXT             V17.16B, V17.16B, V17.16B, #8
>> +       EOR             V11.16B, V11.16B, V5.16B
>> +
>> +       EXT             V18.16B, V18.16B, V18.16B, #8
>> +       EOR             V12.16B, V12.16B, V14.16B
>> +
>> +       EXT             V19.16B, V19.16B, V19.16B, #8
>> +       EOR             V13.16B, V13.16B, V15.16B
>> +
>> +       SUB             X2, X2, #0x40
>> +
>> +
>> +       EOR             V10.16B, V10.16B, V16.16B
>> +       EOR             V11.16B, V11.16B, V17.16B
>> +
>> +       EOR             V12.16B, V12.16B, V18.16B
>> +       EOR             V13.16B, V13.16B, V19.16B
>> +
>> +       CMP             X2, #0x0
>> +       B.GE            1b                      // >=0
>> +
>> +       LDR             Q6, =0x06df0000000000002d56000000000000
>> +       MOV             V4.16B, V10.16B
>> +       /* V10 carry-less 0x06df000000000000([127:64]*[127:64]) */
>> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
>> +       PMULL2          V10.1Q, V10.2D, V6.2D
>> +       EOR             V11.16B, V11.16B, V4.16B
>> +       EOR             V11.16B, V11.16B, V10.16B
>> +
>> +       MOV             V4.16B, V11.16B
>> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
>> +       PMULL2          V11.1Q, V11.2D, V6.2D
>> +       EOR             V12.16B, V12.16B, V4.16B
>> +       EOR             V12.16B, V12.16B, V11.16B
>> +
>> +       MOV             V4.16B, V12.16B
>> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
>> +       PMULL2          V12.1Q, V12.2D, V6.2D
>> +       EOR             V13.16B, V13.16B, V4.16B
>> +       EOR             V13.16B, V13.16B, V12.16B
>> +
>> +       ADD             X2, X2, #48
>> +       CMP             X2, #0x0
>> +       B.LT            3f                      // _final_reduction_for_128, <0
>> +
>> +       /* _16B_reduction_loop */
>> +4:
>> +       /* unrelated load as early as possible*/
>> +       LDR             Q10, [X1], #0x10
>> +
>> +       MOV             V4.16B, V13.16B
>> +       PMULL2          V13.1Q, V13.2D, V6.2D
>> +       PMULL           V4.1Q, V4.1D, V6.1D
>> +       EOR             V13.16B, V13.16B, V4.16B
>> +
>> +       REV64           V10.16B, V10.16B
>> +       EXT             V10.16B, V10.16B, V10.16B, #8
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       SUB             X2, X2, #0x10
>> +       CMP             X2, #0x0
>> +       B.GE            4b                      // _16B_reduction_loop, >=0
>> +
>> +       /*  _final_reduction_for_128 */
>> +3:     ADD             X2, X2, #0x10
>> +       CMP             X2, #0x0
>> +       B.EQ            5f                      // _128_done, ==0
>> +
>> +       /* _get_last_two_xmms */
>
> Bogus comment. I guess you ported this code from x86, are you sure you
> don't need to credit the original author?
>
>> +6:     MOV             V12.16B, V13.16B
>> +       SUB             X1, X1, #0x10
>> +       ADD             X1, X1, X2
>> +       LDR             Q11, [X1], #0x10
>> +       REV64           V11.16B, V11.16B
>> +       EXT             V11.16B, V11.16B, V11.16B, #8
>> +
>> +       CMP             X2, #8
>> +       B.EQ            50f
>> +       B.LT            51f
>> +       B.GT            52f
>> +
>> +50:
>> +       /* dont use X register as temp one */
>> +       FMOV            D14, D12
>> +       MOVI            D12, #0
>> +       MOV             V12.D[1],V14.D[0]
>> +       B               53f
>> +51:
>> +       MOV             X9, #64
>> +       LSL             X13, X2, #3             // <<3 equal x8
>> +       SUB             X9, X9, X13
>> +       MOV             X5, V12.D[0]            // low 64-bit
>> +       MOV             X6, V12.D[1]            // high 64-bit
>> +       LSR             X10, X5, X9             // high bit of low 64-bit
>> +       LSL             X7, X5, X13
>> +       LSL             X8, X6, X13
>> +       ORR             X8, X8, X10             // combination of high 64-bit
>> +       MOV             V12.D[1], X8
>> +       MOV             V12.D[0], X7
>> +
>> +       B               53f
>> +52:
>> +       LSL             X13, X2, #3             // <<3 equal x8
>> +       SUB             X13, X13, #64
>> +
>> +       DUP             V18.2D, X13
>> +       FMOV            D16, D12
>> +       USHL            D16, D16, D18
>> +       EXT             V12.16B, V16.16B, V16.16B, #8
>> +
>> +53:
>> +       MOVI            D14, #0                 //add one zero constant
>> +
>> +       CMP             X2, #0
>> +       B.EQ    30f
>> +       CMP             X2, #1
>> +       B.EQ    31f
>> +       CMP             X2, #2
>> +       B.EQ    32f
>> +       CMP             X2, #3
>> +       B.EQ    33f
>> +       CMP             X2, #4
>> +       B.EQ    34f
>> +       CMP             X2, #5
>> +       B.EQ    35f
>> +       CMP             X2, #6
>> +       B.EQ    36f
>> +       CMP             X2, #7
>> +       B.EQ    37f
>> +       CMP             X2, #8
>> +       B.EQ    38f
>> +       CMP             X2, #9
>> +       B.EQ    39f
>> +       CMP             X2, #10
>> +       B.EQ    40f
>> +       CMP             X2, #11
>> +       B.EQ    41f
>> +       CMP             X2, #12
>> +       B.EQ    42f
>> +       CMP             X2, #13
>> +       B.EQ    43f
>> +       CMP             X2, #14
>> +       B.EQ    44f
>> +       CMP             X2, #15
>> +       B.EQ    45f
>> +
>
> This looks awful. If you make the snippets below a fixed size, you
> could use a computed goto instead
>
>> +       // >> 128bit
>> +30:
>> +       EOR             V13.16B, V13.16B, V13.16B
>> +       EOR             V8.16B, V8.16B, V8.16B
>> +       LDR             Q9,=0xffffffffffffffffffffffffffffffff
>
> Shouldn't you initialize q8 here as well.

Never mind, I just spotted the EOR which intializes it to 0x0

> And in general, couldn't you
> use some kind of shift to generate these constants (in all cases
> below)?
>> +       B               46f
>> +
>> +       // >> 120bit
>> +31:
>> +       USHR            V13.2D, V13.2D, #56
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xff
>> +       LDR             Q9,=0xffffffffffffffffffffffffffffff00
>> +       B               46f
>> +
>> +       // >> 112bit
>> +32:
>> +       USHR             V13.2D, V13.2D, #48
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffff
>> +       LDR             Q9,=0xffffffffffffffffffffffffffff0000
>> +       B               46f
>> +
>> +       // >> 104bit
>> +33:
>> +       USHR            V13.2D, V13.2D, #40
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffff
>> +       LDR             Q9,=0xffffffffffffffffffffffffff000000
>> +       B               46f
>> +
>> +       // >> 96bit
>> +34:
>> +       USHR            V13.2D, V13.2D, #32
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffffff
>> +       LDR             Q9,=0xffffffffffffffffffffffff00000000
>> +       B               46f
>> +
>> +       // >> 88bit
>> +35:
>> +       USHR            V13.2D, V13.2D, #24
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffffffff
>> +       LDR             Q9,=0xffffffffffffffffffffff0000000000
>> +       B               46f
>> +
>> +       // >> 80bit
>> +36:
>> +       USHR            V13.2D, V13.2D, #16
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffffffffff
>> +       LDR             Q9,=0xffffffffffffffffffff000000000000
>> +       B               46f
>> +
>> +       // >> 72bit
>> +37:
>> +       USHR            V13.2D, V13.2D, #8
>> +       EXT             V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffffffffffff
>> +       LDR             Q9,=0xffffffffffffffffff00000000000000
>> +       B               46f
>> +
>> +       // >> 64bit
>> +38:
>> +       EXT              V13.16B, V13.16B, V14.16B, #8
>> +       LDR             Q8,=0xffffffffffffffff
>> +       LDR             Q9,=0xffffffffffffffff0000000000000000
>> +       B               46f
>> +
>> +       // >> 56bit
>> +39:
>> +       EXT             V13.16B, V13.16B, V13.16B, #7
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +       MOV             V13.B[9], V14.B[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffff
>> +       LDR             Q9,=0xffffffffffffff000000000000000000
>> +       B               46f
>> +
>> +       // >> 48bit
>> +40:
>> +       EXT             V13.16B, V13.16B, V13.16B, #6
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffff
>> +       LDR             Q9,=0xffffffffffff00000000000000000000
>> +       B               46f
>> +
>> +       // >> 40bit
>> +41:
>> +       EXT             V13.16B, V13.16B, V13.16B, #5
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.B[11], V14.B[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffffff
>> +       LDR             Q9,=0xffffffffff0000000000000000000000
>> +       B               46f
>> +
>> +       // >> 32bit
>> +42:
>> +       EXT             V13.16B, V13.16B, V13.16B, #4
>> +       MOV             V13.S[3], V14.S[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffffffff
>> +       LDR             Q9,=0xffffffff000000000000000000000000
>> +       B               46f
>> +
>> +       // >> 24bit
>> +43:
>> +       EXT             V13.16B, V13.16B, V13.16B, #3
>> +       MOV             V13.H[7], V14.H[0]
>> +       MOV             V13.B[13], V14.B[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffffffffff
>> +       LDR             Q9,=0xffffff00000000000000000000000000
>> +       B               46f
>> +
>> +       // >> 16bit
>> +44:
>> +       EXT             V13.16B, V13.16B, V13.16B, #2
>> +       MOV             V13.H[7], V14.H[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffffffffffff
>> +       LDR             Q9,=0xffff0000000000000000000000000000
>> +       B               46f
>> +
>> +       // >> 8bit
>> +45:
>> +       EXT              V13.16B, V13.16B, V13.16B, #1
>> +       MOV             V13.B[15], V14.B[0]
>> +
>> +       LDR             Q8,=0xffffffffffffffffffffffffffffff
>> +       LDR             Q9,=0xff000000000000000000000000000000
>> +
>> +       // backup V12 first
>> +       // pblendvb     xmm1, xmm2
>
> Another remnant of the x86 version, please remove
>
>> +46:
>> +       AND             V12.16B, V12.16B, V9.16B
>> +       AND             V11.16B, V11.16B, V8.16B
>> +       ORR             V11.16B, V11.16B, V12.16B
>> +
>> +       MOV             V12.16B, V11.16B
>> +       MOV             V4.16B, V13.16B
>> +       PMULL2          V13.1Q, V13.2D, V6.2D
>> +       PMULL           V4.1Q, V4.1D, V6.1D
>> +       EOR             V13.16B, V13.16B, V4.16B
>> +       EOR             V13.16B, V13.16B, V12.16B
>> +
>> +       /* _128_done. we change the Q6 D[0] and D[1] */
>> +5:     LDR             Q6, =0x2d560000000000001368000000000000
>> +       MOVI            D14, #0
>> +       MOV             V10.16B, V13.16B
>> +       PMULL2          V13.1Q, V13.2D, V6.2D
>> +
>> +       MOV             V10.D[1], V10.D[0]
>> +       MOV             V10.D[0], V14.D[0]    //set zero
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       MOV             V10.16B, V13.16B
>> +       LDR             Q7, =0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
>> +       AND             V10.16B, V10.16B, V7.16B
>> +
>> +       MOV             S13, V13.S[3]
>> +
>> +       PMULL           V13.1Q, V13.1D, V6.1D
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       /* _barrett */
>
> What does '_barrett' mean?
>
>> +7:     LDR             Q6, =0x00000001f65a57f8000000018bb70000
>> +       MOVI            D14, #0
>> +       MOV             V10.16B, V13.16B
>> +       PMULL2          V13.1Q, V13.2D, V6.2D
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #12
>> +       MOV             V13.S[0], V14.S[0]
>> +
>> +       EXT             V6.16B, V6.16B, V6.16B, #8
>> +       PMULL2          V13.1Q, V13.2D, V6.2D
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #12
>> +       MOV             V13.S[0], V14.S[0]
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +       MOV             X0, V13.D[0]
>> +
>> +       /* _cleanup */
>> +8:     MOV             X14, #48
>> +       LSR             X0, X0, X14
>
> Why the temp x14?
>
>> +99:
>> +       ADD             sp, sp, #0x20
>> +
>> +       LDP             Q18, Q19, [sp], #0x20
>> +       LDP             Q16, Q17, [sp], #0x20
>> +       LDP             Q14, Q15, [sp], #0x20
>> +
>> +       LDP             Q8, Q9, [sp], #0x20
>> +       LDP             Q6, Q7, [sp], #0x20
>> +       LDP             Q4, Q5, [sp], #0x20
>> +       LDP             Q12, Q13, [sp], #0x20
>> +       LDP             Q10, Q11, [sp], #0x20
>> +       LDP             X13, X14, [sp], #0x10
>> +       LDP             X11, X12, [sp], #0x10
>> +       LDP             X9, X10, [sp], #0x10
>> +       LDP             X7, X8, [sp], #0x10
>> +       LDP             X5, X6, [sp], #0x10
>> +
>
> None of these registers need to be restored. The only thing you need
> (to mirror the prologue)
>
> ldp x29, x30, [sp], #xxx
> ret
>
> where xxx is the same value you used at the beginning.
>
>
>> +       RET
>> +
>> +       /* _less_than_128 */
>> +2:     CMP             X2, #32
>> +       B.LT            9f                              // _less_than_32
>> +       LDR             Q6, =0x06df0000000000002d56000000000000
>> +
>> +       LSL             X0, X0, #48
>> +       LDR             Q10, =0x0
>
> Please use movi here
>
>> +       MOV             V10.D[1], X0
>> +       LDR             Q13, [X1], #0x10
>> +       REV64           V13.16B, V13.16B
>> +       EXT             V13.16B, V13.16B, V13.16B, #8
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       SUB             X2, X2, #32
>> +       B               4b
>> +
>> +       /* _less_than_32 */
>> +9:     CMP             X2, #0
>> +       B.EQ            99b                     // _cleanup
>
> You can use CBZ here
>
>> +       LSL             X0, X0, #48
>> +       LDR             Q10,=0x0
>
> Please use movi here
>
>> +       MOV             V10.D[1], X0
>> +
>> +       CMP             X2, #16
>> +       B.EQ            10f                     // _exact_16_left
>> +       B.LE            11f                     // _less_than_16_left
>> +       LDR             Q13, [X1], #0x10
>> +
>> +       REV64           V13.16B, V13.16B
>> +       EXT             V13.16B, V13.16B, V13.16B, #8
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +       SUB             X2, X2, #16
>> +       LDR             Q6, =0x06df0000000000002d56000000000000
>> +       B               6b                      // _get_last_two_xmms
>
> Another bogus comment
>
>> +
>> +       /*  _less_than_16_left */
>> +11:    CMP             X2, #4
>> +       B.LT            13f                     // _only_less_than_4
>> +
>> +       /* backup the length of data, we used in _less_than_2_left */
>> +       MOV             X8, X2
>> +       CMP             X2, #8
>> +       B.LT            14f                     // _less_than_8_left
>> +
>> +       LDR             X14, [X1], #8
>> +       /* push the data to stack, we backup the data to V10 */
>> +       STR             X14, [sp, #0]
>> +       SUB             X2, X2, #8
>> +       ADD             X11, X11, #8
>> +
>> +       /* _less_than_8_left */
>> +14:    CMP             X2, #4
>> +       B.LT            15f                     // _less_than_4_left
>> +
>> +       /* get 32bit data */
>> +       LDR             W5, [X1], #4
>> +
>> +       /* push the data to stack */
>> +       STR             W5, [sp, X11]
>> +       SUB             X2, X2, #4
>> +       ADD             X11, X11, #4
>> +
>> +       /* _less_than_4_left */
>> +15:    CMP             X2, #2
>> +       B.LT            16f                     // _less_than_2_left
>> +
>> +       /* get 16bits data */
>> +       LDRH            W6, [X1], #2
>> +
>> +       /* push the data to stack */
>> +       STRH            W6, [sp, X11]
>> +       SUB             X2, X2, #2
>> +       ADD             X11, X11, #2
>> +
>> +       /* _less_than_2_left */
>> +16:
>> +       /* get 8bits data */
>> +       LDRB            W7, [X1], #1
>> +       STRB            W7, [sp, X11]
>> +       ADD             X11, X11, #1
>> +
>> +       /* POP data from stack, store to V13 */
>> +       LDR             Q13, [sp]
>> +       MOVI            D14, #0
>> +       REV64           V13.16B, V13.16B
>> +       MOV             V8.16B, V13.16B
>> +       MOV             V13.D[1], V8.D[0]
>> +       MOV             V13.D[0], V8.D[1]
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +       CMP             X8, #15
>> +       B.EQ    80f
>> +       CMP             X8, #14
>> +       B.EQ    81f
>> +       CMP             X8, #13
>> +       B.EQ    82f
>> +       CMP             X8, #12
>> +       B.EQ    83f
>> +       CMP             X8, #11
>> +       B.EQ    84f
>> +       CMP             X8, #10
>> +       B.EQ    85f
>> +       CMP             X8, #9
>> +       B.EQ    86f
>> +       CMP             X8, #8
>> +       B.EQ    87f
>> +       CMP             X8, #7
>> +       B.EQ    88f
>> +       CMP             X8, #6
>> +       B.EQ    89f
>> +       CMP             X8, #5
>> +       B.EQ    90f
>> +       CMP             X8, #4
>> +       B.EQ    91f
>> +       CMP             X8, #3
>> +       B.EQ    92f
>> +       CMP             X8, #2
>> +       B.EQ    93f
>> +       CMP             X8, #1
>> +       B.EQ    94f
>> +       CMP             X8, #0
>> +       B.EQ    95f
>> +
>
> Again, please use a computed goto instead
>
>> +80:
>> +       EXT             V13.16B, V13.16B, V13.16B, #1
>> +       MOV             V13.B[15], V14.B[0]
>> +       B               5b
>> +
>> +81:
>> +       EXT             V13.16B, V13.16B, V13.16B, #2
>> +       MOV              V13.H[7], V14.H[0]
>> +       B               5b
>> +
>> +82:
>> +       EXT             V13.16B, V13.16B, V13.16B, #3
>> +       MOV             V13.H[7], V14.H[0]
>> +       MOV             V13.B[13], V14.B[0]
>> +       B               5b
>> +83:
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #4
>> +       MOV             V13.S[3], V14.S[0]
>> +       B               5b
>> +
>> +84:
>> +       EXT             V13.16B, V13.16B, V13.16B, #5
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.B[11], V14.B[0]
>> +       B               5b
>> +
>> +85:
>> +       EXT             V13.16B, V13.16B, V13.16B, #6
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +       B               5b
>> +
>> +86:
>> +       EXT             V13.16B, V13.16B, V13.16B, #7
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +       MOV             V13.B[9], V14.B[0]
>> +       B               5b
>> +
>> +87:
>> +       MOV             V13.D[0], V13.D[1]
>> +       MOV             V13.D[1], V14.D[0]
>> +       B               5b
>> +
>> +88:
>> +       EXT             V13.16B, V13.16B, V13.16B, #9
>> +       MOV             V13.D[1], V14.D[0]
>> +       MOV             V13.B[7], V14.B[0]
>> +       B               5b
>> +
>> +89:
>> +       EXT             V13.16B, V13.16B, V13.16B, #10
>> +       MOV             V13.D[1], V14.D[0]
>> +       MOV              V13.H[3], V14.H[0]
>> +       B               5b
>> +
>> +90:
>> +       EXT             V13.16B, V13.16B, V13.16B, #11
>> +       MOV             V13.D[1], V14.D[0]
>> +       MOV             V13.H[3], V14.H[0]
>> +       MOV             V13.B[5], V14.B[0]
>> +       B               5b
>> +
>> +91:
>> +       MOV             V13.S[0], V13.S[3]
>> +       MOV             V13.D[1], V14.D[0]
>> +       MOV             V13.S[1], V14.S[0]
>> +       B               5b
>> +
>> +92:
>> +       EXT             V13.16B, V13.16B, V13.16B, #13
>> +       MOV             V13.D[1], V14.D[0]
>> +       MOV             V13.S[1], V14.S[0]
>> +       MOV             V13.B[3], V14.B[0]
>> +       B               5b
>> +
>> +93:
>> +       MOV             V15.H[0], V13.H[7]
>> +       MOV             V13.16B, V14.16B
>> +       MOV             V13.H[0], V15.H[0]
>> +       B               5b
>> +
>> +94:
>> +       MOV             V15.B[0], V13.B[15]
>> +       MOV             V13.16B, V14.16B
>> +       MOV             V13.B[0], V15.B[0]
>> +       B               5b
>> +
>> +95:
>> +       LDR             Q13,=0x0
>
> movi
>
>> +       B               5b                              // _128_done
>> +
>> +       /* _exact_16_left */
>> +10:
>> +       LD1             { V13.2D }, [X1], #0x10
>> +
>> +       REV64           V13.16B, V13.16B
>> +       EXT             V13.16B, V13.16B, V13.16B, #8
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +       B               5b                              // _128_done
>> +
>> +       /* _only_less_than_4 */
>> +13:    CMP             X2, #3
>> +       MOVI            D14, #0
>> +       B.LT            17f                             //_only_less_than_3
>> +
>> +       LDR             S13, [X1], #4
>> +       MOV              V13.B[15], V13.B[0]
>> +       MOV             V13.B[14], V13.B[1]
>> +       MOV             V13.B[13], V13.B[2]
>> +       MOV             V13.S[0], V13.S[1]
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #5
>> +
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.B[11], V14.B[0]
>> +
>> +       B               7b                              // _barrett
>> +       /* _only_less_than_3 */
>> +17:
>> +       CMP             X2, #2
>> +       B.LT            18f                             // _only_less_than_2
>> +
>> +       LDR             H13, [X1], #2
>> +       MOV             V13.B[15], V13.B[0]
>> +       MOV             V13.B[14], V13.B[1]
>> +       MOV             V13.H[0], V13.H[1]
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #6
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +
>> +       B               7b                              // _barrett
>> +
>> +       /* _only_less_than_2 */
>> +18:
>> +       LDRB            W7, [X1], #1
>> +       LDR             Q13, = 0x0
>> +       MOV             V13.B[15], W7
>> +
>> +       EOR             V13.16B, V13.16B, V10.16B
>> +
>> +       EXT             V13.16B, V13.16B, V13.16B, #7
>> +       MOV             V13.S[3], V14.S[0]
>> +       MOV             V13.H[5], V14.H[0]
>> +       MOV             V13.B[9], V14.B[0]
>> +
>> +       B               7b                              // _barrett
>
> Please end with ENDPROC()
>
>> diff --git a/arch/arm64/crypto/crct10dif-neon_glue.c b/arch/arm64/crypto/crct10dif-neon_glue.c
>> new file mode 100644
>> index 0000000..a6d8c5c
>> --- /dev/null
>> +++ b/arch/arm64/crypto/crct10dif-neon_glue.c
>> @@ -0,0 +1,115 @@
>> +/*
>> + * Copyright (c) 2016-2017 Hisilicon Limited.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + */
>> +
>> +
>> +#include <linux/types.h>
>> +#include <linux/module.h>
>> +#include <linux/crc-t10dif.h>
>> +#include <crypto/internal/hash.h>
>> +#include <linux/init.h>
>> +#include <linux/string.h>
>> +#include <linux/kernel.h>
>> +
>> +asmlinkage __u16 crc_t10dif_neon(__u16 crc, const unsigned char *buf,
>> +                               size_t len);
>> +
>> +struct chksum_desc_ctx {
>> +       __u16 crc;
>> +};
>> +
>> +/*
>> + * Steps through buffer one byte at at time, calculates reflected
>> + * crc using table.
>> + */
>> +
>
> Where is the table?
>
>> +static int chksum_init(struct shash_desc *desc)
>> +{
>> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +       ctx->crc = 0;
>> +
>> +       return 0;
>> +}
>> +
>> +static int chksum_update(struct shash_desc *desc, const u8 *data,
>> +                        unsigned int length)
>> +{
>> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +       ctx->crc = crc_t10dif_neon(ctx->crc, data, length);
>
> You need kernel_neon_begin/kernel_neon_end here
>
>> +       return 0;
>> +}
>> +
>> +static int chksum_final(struct shash_desc *desc, u8 *out)
>> +{
>> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +       *(__u16 *)out = ctx->crc;
>> +       return 0;
>> +}
>> +
>> +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
>> +                       u8 *out)
>> +{
>> +       *(__u16 *)out = crc_t10dif_neon(*crcp, data, len);
>
> ... and here
>
>> +       return 0;
>> +}
>> +
>> +static int chksum_finup(struct shash_desc *desc, const u8 *data,
>> +                       unsigned int len, u8 *out)
>> +{
>> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +       return __chksum_finup(&ctx->crc, data, len, out);
>> +}
>> +
>> +static int chksum_digest(struct shash_desc *desc, const u8 *data,
>> +                        unsigned int length, u8 *out)
>> +{
>> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
>> +
>> +       return __chksum_finup(&ctx->crc, data, length, out);
>> +}
>> +
>> +static struct shash_alg alg = {
>> +       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
>> +       .init           =       chksum_init,
>> +       .update         =       chksum_update,
>> +       .final          =       chksum_final,
>> +       .finup          =       chksum_finup,
>> +       .digest         =       chksum_digest,
>> +       .descsize               =       sizeof(struct chksum_desc_ctx),
>> +       .base                   =       {
>> +               .cra_name               =       "crct10dif",
>> +               .cra_driver_name        =       "crct10dif-neon",
>> +               .cra_priority           =       200,
>> +               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
>> +               .cra_module             =       THIS_MODULE,
>
>
> Please align the = characters here, and add only a single space after
>
> Note that you can do
>
> .base.cra_name = xxx,
> .base.xxx
>
> as well.
>
>> +       }
>> +};
>> +
>> +static int __init crct10dif_arm64_mod_init(void)
>> +{
>> +       return crypto_register_shash(&alg);
>
> You need to check here if your CPU has support for the 64x64->128
> PMULL instruction.
>
>> +}
>> +
>> +static void __exit crct10dif_arm64_mod_fini(void)
>> +{
>> +       crypto_unregister_shash(&alg);
>> +}
>> +
>> +module_init(crct10dif_arm64_mod_init);
>> +module_exit(crct10dif_arm64_mod_fini);
>> +
>> +MODULE_AUTHOR("YueHaibing <yuehaibing@huawei.com>");
>> +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with ARM64 NEON instruction.");
>> +MODULE_LICENSE("GPL");
>> +
>> +MODULE_ALIAS_CRYPTO("crct10dif");
>> +MODULE_ALIAS_CRYPTO("crct10dif-neon");
>> --
>> 2.7.0
>>
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH v3] arm64/crypto: Accelerated CRC T10 DIF computation
From: Ard Biesheuvel @ 2016-11-22 12:53 UTC (permalink / raw)
  To: YueHaibing
  Cc: Herbert Xu, David S. Miller, Catalin Marinas, Will Deacon,
	linux-kernel@vger.kernel.org, Hanjun Guo, dingtinahong,
	yangshengkai, linux-arm-kernel@lists.infradead.org,
	linux-crypto@vger.kernel.org
In-Reply-To: <20161122101455.5312-1-yuehaibing@huawei.com>

On 22 November 2016 at 10:14, YueHaibing <yuehaibing@huawei.com> wrote:
> This is the ARM64 CRC T10 DIF transform accelerated with the ARMv8
> NEON instruction.The config CRYPTO_CRCT10DIF_NEON should be turned
> on to enable the feature.The crc_t10dif crypto library function will
> use this faster algorithm when crct10dif_neon module is loaded.
>

What is this algorithm commonly used for? In other words, why is it a
good idea to add support for this algorithm to the kernel?

> Tcrypt benchmark results:
>
> HIP06  (mode=320 sec=2)
>
> The ratio of bytes/sec crct10dif-neon Vs. crct10dif-generic:
>
>                         TEST                              neon          generic         ratio
>   16 byte blocks,   16 bytes per update,   1 updates    214506112       171095400       1.25
>   64 byte blocks,   16 bytes per update,   4 updates    139385312       119036352       1.17
>   64 byte blocks,   64 bytes per update,   1 updates    671523712       198945344       3.38
>  256 byte blocks,   16 bytes per update,  16 updates    157674880       125146752       1.26
>  256 byte blocks,   64 bytes per update,   4 updates    491888128       175764096       2.80
>  256 byte blocks,  256 bytes per update,   1 updates    2123298176      206995200       10.26
> 1024 byte blocks,   16 bytes per update,  64 updates    161243136       126460416       1.28
> 1024 byte blocks,  256 bytes per update,   4 updates    1643020800      200027136       8.21
> 1024 byte blocks, 1024 bytes per update,   1 updates    4238239232      209106432       20.27
> 2048 byte blocks,   16 bytes per update, 128 updates    162079744       126953472       1.28
> 2048 byte blocks,  256 bytes per update,   8 updates    1693587456      200867840       8.43
> 2048 byte blocks, 1024 bytes per update,   2 updates    3424323584      206330880       16.60
> 2048 byte blocks, 2048 bytes per update,   1 updates    5228207104      208620544       25.06
> 4096 byte blocks,   16 bytes per update, 256 updates    162304000       126894080       1.28
> 4096 byte blocks,  256 bytes per update,  16 updates    1731862528      201197568       8.61
> 4096 byte blocks, 1024 bytes per update,   4 updates    3668625408      207003648       17.72
> 4096 byte blocks, 4096 bytes per update,   1 updates    5551239168      209127424       26.54
> 8192 byte blocks,   16 bytes per update, 512 updates    162779136       126984192       1.28
> 8192 byte blocks,  256 bytes per update,  32 updates    1753702400      201420800       8.71
> 8192 byte blocks, 1024 bytes per update,   8 updates    3760918528      207351808       18.14
> 8192 byte blocks, 4096 bytes per update,   2 updates    5483655168      208928768       26.25
> 8192 byte blocks, 8192 bytes per update,   1 updates    5623377920      209108992       26.89
>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
> Signed-off-by: YangShengkai <yangshengkai@huawei.com>
> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
> Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
>
> ---
>  arch/arm64/crypto/Kconfig                 |   5 +
>  arch/arm64/crypto/Makefile                |   4 +
>  arch/arm64/crypto/crct10dif-neon-asm_64.S | 751 ++++++++++++++++++++++++++++++
>  arch/arm64/crypto/crct10dif-neon_glue.c   | 115 +++++
>  4 files changed, 875 insertions(+)
>  create mode 100644 arch/arm64/crypto/crct10dif-neon-asm_64.S
>  create mode 100644 arch/arm64/crypto/crct10dif-neon_glue.c
>
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index 2cf32e9..2e450bf 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -23,6 +23,11 @@ config CRYPTO_GHASH_ARM64_CE
>         depends on ARM64 && KERNEL_MODE_NEON
>         select CRYPTO_HASH
>
> +config CRYPTO_CRCT10DIF_NEON
> +       tristate "CRCT10DIF hardware acceleration using NEON instructions"
> +       depends on ARM64 && KERNEL_MODE_NEON
> +       select CRYPTO_HASH
> +

Could you please follow the existing pattern:

config CRYPTO_CRCT10DIF_ARM64_NEON


>  config CRYPTO_AES_ARM64_CE
>         tristate "AES core cipher using ARMv8 Crypto Extensions"
>         depends on ARM64 && KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index abb79b3..6c9ff2c 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -29,6 +29,10 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
>  obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
>  aes-neon-blk-y := aes-glue-neon.o aes-neon.o
>
> +obj-$(CONFIG_CRYPTO_CRCT10DIF_NEON) += crct10dif-neon.o
> +crct10dif-neon-y := crct10dif-neon-asm_64.o crct10dif-neon_glue.o
> +AFLAGS_crct10dif-neon-asm_64.o := -march=armv8-a+crypto
> +

Please drop this line, and add

.cpu generic+crypto

to the .S file

>  AFLAGS_aes-ce.o                := -DINTERLEAVE=4
>  AFLAGS_aes-neon.o      := -DINTERLEAVE=4
>
> diff --git a/arch/arm64/crypto/crct10dif-neon-asm_64.S b/arch/arm64/crypto/crct10dif-neon-asm_64.S
> new file mode 100644
> index 0000000..2ae3033
> --- /dev/null
> +++ b/arch/arm64/crypto/crct10dif-neon-asm_64.S
> @@ -0,0 +1,751 @@
> +/*
> + * Copyright (c) 2016-2017 Hisilicon Limited.
> + *

Please drop the 2017 here.

> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/assembler.h>
> +
> +.global crc_t10dif_neon

Please drop this .global, and use ENTRY() below

> +.text
> +
> +/* X0 is initial CRC value
> + * X1 is data buffer
> + * X2 is the length of buffer
> + * X3 is the backup buffer(for extend)
> + * X4 for other extend parameter(for extend)
> + * Q0, Q1, Q2, Q3 maybe as parameter for other functions,
> + * the value of Q0, Q1, Q2, Q3 maybe modified.
> + *
> + * suggestion:
> + * 1. dont use general purpose register for calculation
> + * 2. set data endianness outside of the kernel
> + * 3. use ext as shifting around
> + * 4. dont use LD3/LD4, ST3/ST4
> + */
> +


Whose suggestions are these, and why? I do appreciate comments like
this, but only if I can learn something from it

> +crc_t10dif_neon:

ENTRY()

> +       /* push the register to stack that CRC16 will use */
> +       STP             X5, X6, [sp, #-0x10]!

Please use an ordinary stack frame, i.e.,

stp   x29, x30, [sp, #-xxx]!
mov x29, sp

where xxx is the entire allocation you need for stacking callee save registers

> +       STP             X7, X8, [sp, #-0x10]!
> +       STP             X9, X10, [sp, #-0x10]!
> +       STP             X11, X12, [sp, #-0x10]!
> +       STP             X13, X14, [sp, #-0x10]!

These are not callee save, so no need to stack them

> +       STP             Q10, Q11, [sp, #-0x20]!
> +       STP             Q12, Q13, [sp, #-0x20]!
> +       STP             Q4, Q5, [sp, #-0x20]!
> +       STP             Q6, Q7, [sp, #-0x20]!
> +       STP             Q8, Q9, [sp, #-0x20]!
> +       STP             Q14, Q15, [sp, #-0x20]!
> +       STP             Q16, Q17, [sp, #-0x20]!
> +       STP             Q18, Q19, [sp, #-0x20]!
> +

What is the point of stacking the NEON registers? Also, as a general
note, could you switch to lower case throughout the file?


> +       SUB             sp,sp,#0x20
> +

Please account for locals in the allocation above. Only outgoing
arguments should be allocated below the frame pointer


> +       MOV             X11, #0         // PUSH STACK FLAG
> +

What does this comment mean?

> +       CMP             X2, #0x80
> +       B.LT            2f              // _less_than_128, <128
> +

Redundant comment

> +       /* V10/V11/V12/V13      is 128bit.
> +        * we get data 512bit( by cacheline ) each time
> +        */
> +       LDP             Q10, Q11, [X1], #0x20
> +       LDP             Q12, Q13, [X1], #0x20
> +
> +       /* move the initial value to V6 register */
> +       LSL             X0, X0, #48
> +       EOR             V6.16B, V6.16B, V6.16B
> +       MOV             V6.D[1], X0
> +
> +       /* big-little end change. because the data in memory is little-end,
> +        * we deal the data for bigend
> +        */
> +

What if I am using a big endian kernel? Hint: you probably need to
wrap these in CPU_LE()

> +       REV64           V10.16B, V10.16B
> +       REV64           V11.16B, V11.16B
> +       REV64           V12.16B, V12.16B
> +       REV64           V13.16B, V13.16B
> +       EXT             V10.16B, V10.16B, V10.16B, #8
> +       EXT             V11.16B, V11.16B, V11.16B, #8
> +       EXT             V12.16B, V12.16B, V12.16B, #8
> +       EXT             V13.16B, V13.16B, V13.16B, #8
> +
> +       EOR             V10.16B, V10.16B, V6.16B
> +
> +       SUB             X2, X2, #0x80
> +       ADD             X5, X1, #0x20
> +
> +       /* deal data when the size of buffer bigger than 128 bytes */
> +       /* _fold_64_B_loop */
> +       LDR             Q6,=0xe658000000000000044c000000000000

Could you move all these non-trivial constants to a separate location
(after the end of the function), and name them?

> +1:
> +
> +       LDP             Q16, Q17, [X1] ,#0x40
> +       LDP             Q18, Q19, [X5], #0x40
> +
> +       /* carry-less multiply.
> +        * V10 high-64bits carry-less multiply
> +        * V6 high-64bits(PMULL2)
> +        * V11 low-64bits carry-less multiply V6 low-64bits(PMULL)
> +        */
> +
> +       PMULL2          V4.1Q, V10.2D, V6.2D
> +       PMULL           V10.1Q, V10.1D, V6.1D
> +       PMULL2          V5.1Q, V11.2D, V6.2D
> +       PMULL           V11.1Q, V11.1D, V6.1D
> +

These instructions are only available if you have the PMULL extension,
so this algorithm is not plain NEON.

> +       REV64           V16.16B, V16.16B
> +       REV64           V17.16B, V17.16B
> +       REV64           V18.16B, V18.16B
> +       REV64           V19.16B, V19.16B
> +

Endian swap on LE only?

> +       PMULL2          V14.1Q, V12.2D, V6.2D
> +       PMULL           V12.1Q, V12.1D, V6.1D
> +       PMULL2          V15.1Q, V13.2D, V6.2D
> +       PMULL           V13.1Q, V13.1D, V6.1D
> +
> +       EXT             V16.16B, V16.16B, V16.16B, #8
> +       EOR             V10.16B, V10.16B, V4.16B
> +
> +       EXT             V17.16B, V17.16B, V17.16B, #8
> +       EOR             V11.16B, V11.16B, V5.16B
> +
> +       EXT             V18.16B, V18.16B, V18.16B, #8
> +       EOR             V12.16B, V12.16B, V14.16B
> +
> +       EXT             V19.16B, V19.16B, V19.16B, #8
> +       EOR             V13.16B, V13.16B, V15.16B
> +
> +       SUB             X2, X2, #0x40
> +
> +
> +       EOR             V10.16B, V10.16B, V16.16B
> +       EOR             V11.16B, V11.16B, V17.16B
> +
> +       EOR             V12.16B, V12.16B, V18.16B
> +       EOR             V13.16B, V13.16B, V19.16B
> +
> +       CMP             X2, #0x0
> +       B.GE            1b                      // >=0
> +
> +       LDR             Q6, =0x06df0000000000002d56000000000000
> +       MOV             V4.16B, V10.16B
> +       /* V10 carry-less 0x06df000000000000([127:64]*[127:64]) */
> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
> +       PMULL2          V10.1Q, V10.2D, V6.2D
> +       EOR             V11.16B, V11.16B, V4.16B
> +       EOR             V11.16B, V11.16B, V10.16B
> +
> +       MOV             V4.16B, V11.16B
> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
> +       PMULL2          V11.1Q, V11.2D, V6.2D
> +       EOR             V12.16B, V12.16B, V4.16B
> +       EOR             V12.16B, V12.16B, V11.16B
> +
> +       MOV             V4.16B, V12.16B
> +       PMULL           V4.1Q, V4.1D, V6.1D     //switch PMULL & PMULL2 order
> +       PMULL2          V12.1Q, V12.2D, V6.2D
> +       EOR             V13.16B, V13.16B, V4.16B
> +       EOR             V13.16B, V13.16B, V12.16B
> +
> +       ADD             X2, X2, #48
> +       CMP             X2, #0x0
> +       B.LT            3f                      // _final_reduction_for_128, <0
> +
> +       /* _16B_reduction_loop */
> +4:
> +       /* unrelated load as early as possible*/
> +       LDR             Q10, [X1], #0x10
> +
> +       MOV             V4.16B, V13.16B
> +       PMULL2          V13.1Q, V13.2D, V6.2D
> +       PMULL           V4.1Q, V4.1D, V6.1D
> +       EOR             V13.16B, V13.16B, V4.16B
> +
> +       REV64           V10.16B, V10.16B
> +       EXT             V10.16B, V10.16B, V10.16B, #8
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       SUB             X2, X2, #0x10
> +       CMP             X2, #0x0
> +       B.GE            4b                      // _16B_reduction_loop, >=0
> +
> +       /*  _final_reduction_for_128 */
> +3:     ADD             X2, X2, #0x10
> +       CMP             X2, #0x0
> +       B.EQ            5f                      // _128_done, ==0
> +
> +       /* _get_last_two_xmms */

Bogus comment. I guess you ported this code from x86, are you sure you
don't need to credit the original author?

> +6:     MOV             V12.16B, V13.16B
> +       SUB             X1, X1, #0x10
> +       ADD             X1, X1, X2
> +       LDR             Q11, [X1], #0x10
> +       REV64           V11.16B, V11.16B
> +       EXT             V11.16B, V11.16B, V11.16B, #8
> +
> +       CMP             X2, #8
> +       B.EQ            50f
> +       B.LT            51f
> +       B.GT            52f
> +
> +50:
> +       /* dont use X register as temp one */
> +       FMOV            D14, D12
> +       MOVI            D12, #0
> +       MOV             V12.D[1],V14.D[0]
> +       B               53f
> +51:
> +       MOV             X9, #64
> +       LSL             X13, X2, #3             // <<3 equal x8
> +       SUB             X9, X9, X13
> +       MOV             X5, V12.D[0]            // low 64-bit
> +       MOV             X6, V12.D[1]            // high 64-bit
> +       LSR             X10, X5, X9             // high bit of low 64-bit
> +       LSL             X7, X5, X13
> +       LSL             X8, X6, X13
> +       ORR             X8, X8, X10             // combination of high 64-bit
> +       MOV             V12.D[1], X8
> +       MOV             V12.D[0], X7
> +
> +       B               53f
> +52:
> +       LSL             X13, X2, #3             // <<3 equal x8
> +       SUB             X13, X13, #64
> +
> +       DUP             V18.2D, X13
> +       FMOV            D16, D12
> +       USHL            D16, D16, D18
> +       EXT             V12.16B, V16.16B, V16.16B, #8
> +
> +53:
> +       MOVI            D14, #0                 //add one zero constant
> +
> +       CMP             X2, #0
> +       B.EQ    30f
> +       CMP             X2, #1
> +       B.EQ    31f
> +       CMP             X2, #2
> +       B.EQ    32f
> +       CMP             X2, #3
> +       B.EQ    33f
> +       CMP             X2, #4
> +       B.EQ    34f
> +       CMP             X2, #5
> +       B.EQ    35f
> +       CMP             X2, #6
> +       B.EQ    36f
> +       CMP             X2, #7
> +       B.EQ    37f
> +       CMP             X2, #8
> +       B.EQ    38f
> +       CMP             X2, #9
> +       B.EQ    39f
> +       CMP             X2, #10
> +       B.EQ    40f
> +       CMP             X2, #11
> +       B.EQ    41f
> +       CMP             X2, #12
> +       B.EQ    42f
> +       CMP             X2, #13
> +       B.EQ    43f
> +       CMP             X2, #14
> +       B.EQ    44f
> +       CMP             X2, #15
> +       B.EQ    45f
> +

This looks awful. If you make the snippets below a fixed size, you
could use a computed goto instead

> +       // >> 128bit
> +30:
> +       EOR             V13.16B, V13.16B, V13.16B
> +       EOR             V8.16B, V8.16B, V8.16B
> +       LDR             Q9,=0xffffffffffffffffffffffffffffffff

Shouldn't you initialize q8 here as well. And in general, couldn't you
use some kind of shift to generate these constants (in all cases
below)?
> +       B               46f
> +
> +       // >> 120bit
> +31:
> +       USHR            V13.2D, V13.2D, #56
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xff
> +       LDR             Q9,=0xffffffffffffffffffffffffffffff00
> +       B               46f
> +
> +       // >> 112bit
> +32:
> +       USHR             V13.2D, V13.2D, #48
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffff
> +       LDR             Q9,=0xffffffffffffffffffffffffffff0000
> +       B               46f
> +
> +       // >> 104bit
> +33:
> +       USHR            V13.2D, V13.2D, #40
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffff
> +       LDR             Q9,=0xffffffffffffffffffffffffff000000
> +       B               46f
> +
> +       // >> 96bit
> +34:
> +       USHR            V13.2D, V13.2D, #32
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffffff
> +       LDR             Q9,=0xffffffffffffffffffffffff00000000
> +       B               46f
> +
> +       // >> 88bit
> +35:
> +       USHR            V13.2D, V13.2D, #24
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffffffff
> +       LDR             Q9,=0xffffffffffffffffffffff0000000000
> +       B               46f
> +
> +       // >> 80bit
> +36:
> +       USHR            V13.2D, V13.2D, #16
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffffffffff
> +       LDR             Q9,=0xffffffffffffffffffff000000000000
> +       B               46f
> +
> +       // >> 72bit
> +37:
> +       USHR            V13.2D, V13.2D, #8
> +       EXT             V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffffffffffff
> +       LDR             Q9,=0xffffffffffffffffff00000000000000
> +       B               46f
> +
> +       // >> 64bit
> +38:
> +       EXT              V13.16B, V13.16B, V14.16B, #8
> +       LDR             Q8,=0xffffffffffffffff
> +       LDR             Q9,=0xffffffffffffffff0000000000000000
> +       B               46f
> +
> +       // >> 56bit
> +39:
> +       EXT             V13.16B, V13.16B, V13.16B, #7
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +       MOV             V13.B[9], V14.B[0]
> +
> +       LDR             Q8,=0xffffffffffffffffff
> +       LDR             Q9,=0xffffffffffffff000000000000000000
> +       B               46f
> +
> +       // >> 48bit
> +40:
> +       EXT             V13.16B, V13.16B, V13.16B, #6
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffff
> +       LDR             Q9,=0xffffffffffff00000000000000000000
> +       B               46f
> +
> +       // >> 40bit
> +41:
> +       EXT             V13.16B, V13.16B, V13.16B, #5
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.B[11], V14.B[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffffff
> +       LDR             Q9,=0xffffffffff0000000000000000000000
> +       B               46f
> +
> +       // >> 32bit
> +42:
> +       EXT             V13.16B, V13.16B, V13.16B, #4
> +       MOV             V13.S[3], V14.S[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffffffff
> +       LDR             Q9,=0xffffffff000000000000000000000000
> +       B               46f
> +
> +       // >> 24bit
> +43:
> +       EXT             V13.16B, V13.16B, V13.16B, #3
> +       MOV             V13.H[7], V14.H[0]
> +       MOV             V13.B[13], V14.B[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffffffffff
> +       LDR             Q9,=0xffffff00000000000000000000000000
> +       B               46f
> +
> +       // >> 16bit
> +44:
> +       EXT             V13.16B, V13.16B, V13.16B, #2
> +       MOV             V13.H[7], V14.H[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffffffffffff
> +       LDR             Q9,=0xffff0000000000000000000000000000
> +       B               46f
> +
> +       // >> 8bit
> +45:
> +       EXT              V13.16B, V13.16B, V13.16B, #1
> +       MOV             V13.B[15], V14.B[0]
> +
> +       LDR             Q8,=0xffffffffffffffffffffffffffffff
> +       LDR             Q9,=0xff000000000000000000000000000000
> +
> +       // backup V12 first
> +       // pblendvb     xmm1, xmm2

Another remnant of the x86 version, please remove

> +46:
> +       AND             V12.16B, V12.16B, V9.16B
> +       AND             V11.16B, V11.16B, V8.16B
> +       ORR             V11.16B, V11.16B, V12.16B
> +
> +       MOV             V12.16B, V11.16B
> +       MOV             V4.16B, V13.16B
> +       PMULL2          V13.1Q, V13.2D, V6.2D
> +       PMULL           V4.1Q, V4.1D, V6.1D
> +       EOR             V13.16B, V13.16B, V4.16B
> +       EOR             V13.16B, V13.16B, V12.16B
> +
> +       /* _128_done. we change the Q6 D[0] and D[1] */
> +5:     LDR             Q6, =0x2d560000000000001368000000000000
> +       MOVI            D14, #0
> +       MOV             V10.16B, V13.16B
> +       PMULL2          V13.1Q, V13.2D, V6.2D
> +
> +       MOV             V10.D[1], V10.D[0]
> +       MOV             V10.D[0], V14.D[0]    //set zero
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       MOV             V10.16B, V13.16B
> +       LDR             Q7, =0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
> +       AND             V10.16B, V10.16B, V7.16B
> +
> +       MOV             S13, V13.S[3]
> +
> +       PMULL           V13.1Q, V13.1D, V6.1D
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       /* _barrett */

What does '_barrett' mean?

> +7:     LDR             Q6, =0x00000001f65a57f8000000018bb70000
> +       MOVI            D14, #0
> +       MOV             V10.16B, V13.16B
> +       PMULL2          V13.1Q, V13.2D, V6.2D
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #12
> +       MOV             V13.S[0], V14.S[0]
> +
> +       EXT             V6.16B, V6.16B, V6.16B, #8
> +       PMULL2          V13.1Q, V13.2D, V6.2D
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #12
> +       MOV             V13.S[0], V14.S[0]
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +       MOV             X0, V13.D[0]
> +
> +       /* _cleanup */
> +8:     MOV             X14, #48
> +       LSR             X0, X0, X14

Why the temp x14?

> +99:
> +       ADD             sp, sp, #0x20
> +
> +       LDP             Q18, Q19, [sp], #0x20
> +       LDP             Q16, Q17, [sp], #0x20
> +       LDP             Q14, Q15, [sp], #0x20
> +
> +       LDP             Q8, Q9, [sp], #0x20
> +       LDP             Q6, Q7, [sp], #0x20
> +       LDP             Q4, Q5, [sp], #0x20
> +       LDP             Q12, Q13, [sp], #0x20
> +       LDP             Q10, Q11, [sp], #0x20
> +       LDP             X13, X14, [sp], #0x10
> +       LDP             X11, X12, [sp], #0x10
> +       LDP             X9, X10, [sp], #0x10
> +       LDP             X7, X8, [sp], #0x10
> +       LDP             X5, X6, [sp], #0x10
> +

None of these registers need to be restored. The only thing you need
(to mirror the prologue)

ldp x29, x30, [sp], #xxx
ret

where xxx is the same value you used at the beginning.


> +       RET
> +
> +       /* _less_than_128 */
> +2:     CMP             X2, #32
> +       B.LT            9f                              // _less_than_32
> +       LDR             Q6, =0x06df0000000000002d56000000000000
> +
> +       LSL             X0, X0, #48
> +       LDR             Q10, =0x0

Please use movi here

> +       MOV             V10.D[1], X0
> +       LDR             Q13, [X1], #0x10
> +       REV64           V13.16B, V13.16B
> +       EXT             V13.16B, V13.16B, V13.16B, #8
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       SUB             X2, X2, #32
> +       B               4b
> +
> +       /* _less_than_32 */
> +9:     CMP             X2, #0
> +       B.EQ            99b                     // _cleanup

You can use CBZ here

> +       LSL             X0, X0, #48
> +       LDR             Q10,=0x0

Please use movi here

> +       MOV             V10.D[1], X0
> +
> +       CMP             X2, #16
> +       B.EQ            10f                     // _exact_16_left
> +       B.LE            11f                     // _less_than_16_left
> +       LDR             Q13, [X1], #0x10
> +
> +       REV64           V13.16B, V13.16B
> +       EXT             V13.16B, V13.16B, V13.16B, #8
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +       SUB             X2, X2, #16
> +       LDR             Q6, =0x06df0000000000002d56000000000000
> +       B               6b                      // _get_last_two_xmms

Another bogus comment

> +
> +       /*  _less_than_16_left */
> +11:    CMP             X2, #4
> +       B.LT            13f                     // _only_less_than_4
> +
> +       /* backup the length of data, we used in _less_than_2_left */
> +       MOV             X8, X2
> +       CMP             X2, #8
> +       B.LT            14f                     // _less_than_8_left
> +
> +       LDR             X14, [X1], #8
> +       /* push the data to stack, we backup the data to V10 */
> +       STR             X14, [sp, #0]
> +       SUB             X2, X2, #8
> +       ADD             X11, X11, #8
> +
> +       /* _less_than_8_left */
> +14:    CMP             X2, #4
> +       B.LT            15f                     // _less_than_4_left
> +
> +       /* get 32bit data */
> +       LDR             W5, [X1], #4
> +
> +       /* push the data to stack */
> +       STR             W5, [sp, X11]
> +       SUB             X2, X2, #4
> +       ADD             X11, X11, #4
> +
> +       /* _less_than_4_left */
> +15:    CMP             X2, #2
> +       B.LT            16f                     // _less_than_2_left
> +
> +       /* get 16bits data */
> +       LDRH            W6, [X1], #2
> +
> +       /* push the data to stack */
> +       STRH            W6, [sp, X11]
> +       SUB             X2, X2, #2
> +       ADD             X11, X11, #2
> +
> +       /* _less_than_2_left */
> +16:
> +       /* get 8bits data */
> +       LDRB            W7, [X1], #1
> +       STRB            W7, [sp, X11]
> +       ADD             X11, X11, #1
> +
> +       /* POP data from stack, store to V13 */
> +       LDR             Q13, [sp]
> +       MOVI            D14, #0
> +       REV64           V13.16B, V13.16B
> +       MOV             V8.16B, V13.16B
> +       MOV             V13.D[1], V8.D[0]
> +       MOV             V13.D[0], V8.D[1]
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +       CMP             X8, #15
> +       B.EQ    80f
> +       CMP             X8, #14
> +       B.EQ    81f
> +       CMP             X8, #13
> +       B.EQ    82f
> +       CMP             X8, #12
> +       B.EQ    83f
> +       CMP             X8, #11
> +       B.EQ    84f
> +       CMP             X8, #10
> +       B.EQ    85f
> +       CMP             X8, #9
> +       B.EQ    86f
> +       CMP             X8, #8
> +       B.EQ    87f
> +       CMP             X8, #7
> +       B.EQ    88f
> +       CMP             X8, #6
> +       B.EQ    89f
> +       CMP             X8, #5
> +       B.EQ    90f
> +       CMP             X8, #4
> +       B.EQ    91f
> +       CMP             X8, #3
> +       B.EQ    92f
> +       CMP             X8, #2
> +       B.EQ    93f
> +       CMP             X8, #1
> +       B.EQ    94f
> +       CMP             X8, #0
> +       B.EQ    95f
> +

Again, please use a computed goto instead

> +80:
> +       EXT             V13.16B, V13.16B, V13.16B, #1
> +       MOV             V13.B[15], V14.B[0]
> +       B               5b
> +
> +81:
> +       EXT             V13.16B, V13.16B, V13.16B, #2
> +       MOV              V13.H[7], V14.H[0]
> +       B               5b
> +
> +82:
> +       EXT             V13.16B, V13.16B, V13.16B, #3
> +       MOV             V13.H[7], V14.H[0]
> +       MOV             V13.B[13], V14.B[0]
> +       B               5b
> +83:
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #4
> +       MOV             V13.S[3], V14.S[0]
> +       B               5b
> +
> +84:
> +       EXT             V13.16B, V13.16B, V13.16B, #5
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.B[11], V14.B[0]
> +       B               5b
> +
> +85:
> +       EXT             V13.16B, V13.16B, V13.16B, #6
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +       B               5b
> +
> +86:
> +       EXT             V13.16B, V13.16B, V13.16B, #7
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +       MOV             V13.B[9], V14.B[0]
> +       B               5b
> +
> +87:
> +       MOV             V13.D[0], V13.D[1]
> +       MOV             V13.D[1], V14.D[0]
> +       B               5b
> +
> +88:
> +       EXT             V13.16B, V13.16B, V13.16B, #9
> +       MOV             V13.D[1], V14.D[0]
> +       MOV             V13.B[7], V14.B[0]
> +       B               5b
> +
> +89:
> +       EXT             V13.16B, V13.16B, V13.16B, #10
> +       MOV             V13.D[1], V14.D[0]
> +       MOV              V13.H[3], V14.H[0]
> +       B               5b
> +
> +90:
> +       EXT             V13.16B, V13.16B, V13.16B, #11
> +       MOV             V13.D[1], V14.D[0]
> +       MOV             V13.H[3], V14.H[0]
> +       MOV             V13.B[5], V14.B[0]
> +       B               5b
> +
> +91:
> +       MOV             V13.S[0], V13.S[3]
> +       MOV             V13.D[1], V14.D[0]
> +       MOV             V13.S[1], V14.S[0]
> +       B               5b
> +
> +92:
> +       EXT             V13.16B, V13.16B, V13.16B, #13
> +       MOV             V13.D[1], V14.D[0]
> +       MOV             V13.S[1], V14.S[0]
> +       MOV             V13.B[3], V14.B[0]
> +       B               5b
> +
> +93:
> +       MOV             V15.H[0], V13.H[7]
> +       MOV             V13.16B, V14.16B
> +       MOV             V13.H[0], V15.H[0]
> +       B               5b
> +
> +94:
> +       MOV             V15.B[0], V13.B[15]
> +       MOV             V13.16B, V14.16B
> +       MOV             V13.B[0], V15.B[0]
> +       B               5b
> +
> +95:
> +       LDR             Q13,=0x0

movi

> +       B               5b                              // _128_done
> +
> +       /* _exact_16_left */
> +10:
> +       LD1             { V13.2D }, [X1], #0x10
> +
> +       REV64           V13.16B, V13.16B
> +       EXT             V13.16B, V13.16B, V13.16B, #8
> +       EOR             V13.16B, V13.16B, V10.16B
> +       B               5b                              // _128_done
> +
> +       /* _only_less_than_4 */
> +13:    CMP             X2, #3
> +       MOVI            D14, #0
> +       B.LT            17f                             //_only_less_than_3
> +
> +       LDR             S13, [X1], #4
> +       MOV              V13.B[15], V13.B[0]
> +       MOV             V13.B[14], V13.B[1]
> +       MOV             V13.B[13], V13.B[2]
> +       MOV             V13.S[0], V13.S[1]
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #5
> +
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.B[11], V14.B[0]
> +
> +       B               7b                              // _barrett
> +       /* _only_less_than_3 */
> +17:
> +       CMP             X2, #2
> +       B.LT            18f                             // _only_less_than_2
> +
> +       LDR             H13, [X1], #2
> +       MOV             V13.B[15], V13.B[0]
> +       MOV             V13.B[14], V13.B[1]
> +       MOV             V13.H[0], V13.H[1]
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #6
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +
> +       B               7b                              // _barrett
> +
> +       /* _only_less_than_2 */
> +18:
> +       LDRB            W7, [X1], #1
> +       LDR             Q13, = 0x0
> +       MOV             V13.B[15], W7
> +
> +       EOR             V13.16B, V13.16B, V10.16B
> +
> +       EXT             V13.16B, V13.16B, V13.16B, #7
> +       MOV             V13.S[3], V14.S[0]
> +       MOV             V13.H[5], V14.H[0]
> +       MOV             V13.B[9], V14.B[0]
> +
> +       B               7b                              // _barrett

Please end with ENDPROC()

> diff --git a/arch/arm64/crypto/crct10dif-neon_glue.c b/arch/arm64/crypto/crct10dif-neon_glue.c
> new file mode 100644
> index 0000000..a6d8c5c
> --- /dev/null
> +++ b/arch/arm64/crypto/crct10dif-neon_glue.c
> @@ -0,0 +1,115 @@
> +/*
> + * Copyright (c) 2016-2017 Hisilicon Limited.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +
> +#include <linux/types.h>
> +#include <linux/module.h>
> +#include <linux/crc-t10dif.h>
> +#include <crypto/internal/hash.h>
> +#include <linux/init.h>
> +#include <linux/string.h>
> +#include <linux/kernel.h>
> +
> +asmlinkage __u16 crc_t10dif_neon(__u16 crc, const unsigned char *buf,
> +                               size_t len);
> +
> +struct chksum_desc_ctx {
> +       __u16 crc;
> +};
> +
> +/*
> + * Steps through buffer one byte at at time, calculates reflected
> + * crc using table.
> + */
> +

Where is the table?

> +static int chksum_init(struct shash_desc *desc)
> +{
> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +       ctx->crc = 0;
> +
> +       return 0;
> +}
> +
> +static int chksum_update(struct shash_desc *desc, const u8 *data,
> +                        unsigned int length)
> +{
> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +       ctx->crc = crc_t10dif_neon(ctx->crc, data, length);

You need kernel_neon_begin/kernel_neon_end here

> +       return 0;
> +}
> +
> +static int chksum_final(struct shash_desc *desc, u8 *out)
> +{
> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +       *(__u16 *)out = ctx->crc;
> +       return 0;
> +}
> +
> +static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
> +                       u8 *out)
> +{
> +       *(__u16 *)out = crc_t10dif_neon(*crcp, data, len);

... and here

> +       return 0;
> +}
> +
> +static int chksum_finup(struct shash_desc *desc, const u8 *data,
> +                       unsigned int len, u8 *out)
> +{
> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +       return __chksum_finup(&ctx->crc, data, len, out);
> +}
> +
> +static int chksum_digest(struct shash_desc *desc, const u8 *data,
> +                        unsigned int length, u8 *out)
> +{
> +       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
> +
> +       return __chksum_finup(&ctx->crc, data, length, out);
> +}
> +
> +static struct shash_alg alg = {
> +       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
> +       .init           =       chksum_init,
> +       .update         =       chksum_update,
> +       .final          =       chksum_final,
> +       .finup          =       chksum_finup,
> +       .digest         =       chksum_digest,
> +       .descsize               =       sizeof(struct chksum_desc_ctx),
> +       .base                   =       {
> +               .cra_name               =       "crct10dif",
> +               .cra_driver_name        =       "crct10dif-neon",
> +               .cra_priority           =       200,
> +               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
> +               .cra_module             =       THIS_MODULE,


Please align the = characters here, and add only a single space after

Note that you can do

.base.cra_name = xxx,
.base.xxx

as well.

> +       }
> +};
> +
> +static int __init crct10dif_arm64_mod_init(void)
> +{
> +       return crypto_register_shash(&alg);

You need to check here if your CPU has support for the 64x64->128
PMULL instruction.

> +}
> +
> +static void __exit crct10dif_arm64_mod_fini(void)
> +{
> +       crypto_unregister_shash(&alg);
> +}
> +
> +module_init(crct10dif_arm64_mod_init);
> +module_exit(crct10dif_arm64_mod_fini);
> +
> +MODULE_AUTHOR("YueHaibing <yuehaibing@huawei.com>");
> +MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with ARM64 NEON instruction.");
> +MODULE_LICENSE("GPL");
> +
> +MODULE_ALIAS_CRYPTO("crct10dif");
> +MODULE_ALIAS_CRYPTO("crct10dif-neon");
> --
> 2.7.0
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* [v3 PATCH 16/16] crypto: aesbs - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts aesbs over to the skcipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/arm/crypto/aesbs-glue.c |  380 +++++++++++++++++--------------------------
 1 file changed, 152 insertions(+), 228 deletions(-)

diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 0511a6c..f5eafce 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -10,8 +10,9 @@
 
 #include <asm/neon.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/cbc.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 
@@ -55,14 +56,14 @@ struct aesbs_xts_ctx {
 	struct AES_KEY	twkey;
 };
 
-static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			     unsigned int key_len)
 {
-	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int bits = key_len * 8;
 
 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	ctx->dec.rk = ctx->enc;
@@ -71,33 +72,33 @@ static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			     unsigned int key_len)
 {
-	struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int bits = key_len * 8;
 
 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	ctx->enc.converted = 0;
 	return 0;
 }
 
-static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			     unsigned int key_len)
 {
-	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int bits = key_len * 4;
 	int err;
 
-	err = xts_check_key(tfm, in_key, key_len);
+	err = xts_verify_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
 	if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 	ctx->dec.rk = ctx->enc.rk;
@@ -107,88 +108,52 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	return 0;
 }
 
-static int aesbs_cbc_encrypt(struct blkcipher_desc *desc,
-			     struct scatterlist *dst,
-			     struct scatterlist *src, unsigned int nbytes)
+static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm,
+				     const u8 *src, u8 *dst)
 {
-	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-	int err;
+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	AES_encrypt(src, dst, &ctx->dec.rk);
+}
 
-	while (walk.nbytes) {
-		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
-		u8 *src = walk.src.virt.addr;
+static int aesbs_cbc_encrypt(struct skcipher_request *req)
+{
+	return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one);
+}
 
-		if (walk.dst.virt.addr == walk.src.virt.addr) {
-			u8 *iv = walk.iv;
-
-			do {
-				crypto_xor(src, iv, AES_BLOCK_SIZE);
-				AES_encrypt(src, src, &ctx->enc);
-				iv = src;
-				src += AES_BLOCK_SIZE;
-			} while (--blocks);
-			memcpy(walk.iv, iv, AES_BLOCK_SIZE);
-		} else {
-			u8 *dst = walk.dst.virt.addr;
-
-			do {
-				crypto_xor(walk.iv, src, AES_BLOCK_SIZE);
-				AES_encrypt(walk.iv, dst, &ctx->enc);
-				memcpy(walk.iv, dst, AES_BLOCK_SIZE);
-				src += AES_BLOCK_SIZE;
-				dst += AES_BLOCK_SIZE;
-			} while (--blocks);
-		}
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
-	}
-	return err;
+static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm,
+				     const u8 *src, u8 *dst)
+{
+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	AES_decrypt(src, dst, &ctx->dec.rk);
 }
 
-static int aesbs_cbc_decrypt(struct blkcipher_desc *desc,
-			     struct scatterlist *dst,
-			     struct scatterlist *src, unsigned int nbytes)
+static int aesbs_cbc_decrypt(struct skcipher_request *req)
 {
-	struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
-
-	while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) {
-		kernel_neon_begin();
-		bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
-				  walk.nbytes, &ctx->dec, walk.iv);
-		kernel_neon_end();
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
-	}
-	while (walk.nbytes) {
-		u32 blocks = walk.nbytes / AES_BLOCK_SIZE;
+	for (err = skcipher_walk_virt(&walk, req, false);
+	     (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) {
+		u32 blocks = nbytes / AES_BLOCK_SIZE;
 		u8 *dst = walk.dst.virt.addr;
 		u8 *src = walk.src.virt.addr;
-		u8 bk[2][AES_BLOCK_SIZE];
 		u8 *iv = walk.iv;
 
-		do {
-			if (walk.dst.virt.addr == walk.src.virt.addr)
-				memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE);
-
-			AES_decrypt(src, dst, &ctx->dec.rk);
-			crypto_xor(dst, iv, AES_BLOCK_SIZE);
-
-			if (walk.dst.virt.addr == walk.src.virt.addr)
-				iv = bk[blocks & 1];
-			else
-				iv = src;
+		if (blocks >= 8) {
+			kernel_neon_begin();
+			bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv);
+			kernel_neon_end();
+			nbytes %= AES_BLOCK_SIZE;
+			continue;
+		}
 
-			dst += AES_BLOCK_SIZE;
-			src += AES_BLOCK_SIZE;
-		} while (--blocks);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		nbytes = crypto_cbc_decrypt_blocks(&walk, tfm,
+						   aesbs_decrypt_one);
 	}
 	return err;
 }
@@ -206,17 +171,15 @@ static void inc_be128_ctr(__be32 ctr[], u32 addend)
 	}
 }
 
-static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
-			     struct scatterlist *dst, struct scatterlist *src,
-			     unsigned int nbytes)
+static int aesbs_ctr_encrypt(struct skcipher_request *req)
 {
-	struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	u32 blocks;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) {
 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
@@ -235,11 +198,7 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
 		kernel_neon_end();
 		inc_be128_ctr(ctr, blocks);
 
-		nbytes -= blocks * AES_BLOCK_SIZE;
-		if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE)
-			break;
-
-		err = blkcipher_walk_done(desc, &walk, tail);
+		err = skcipher_walk_done(&walk, tail);
 	}
 	if (walk.nbytes) {
 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -248,23 +207,21 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc,
 
 		AES_encrypt(walk.iv, ks, &ctx->enc.rk);
 		if (tdst != tsrc)
-			memcpy(tdst, tsrc, nbytes);
-		crypto_xor(tdst, ks, nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+			memcpy(tdst, tsrc, walk.nbytes);
+		crypto_xor(tdst, ks, walk.nbytes);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	return err;
 }
 
-static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
-			     struct scatterlist *dst,
-			     struct scatterlist *src, unsigned int nbytes)
+static int aesbs_xts_encrypt(struct skcipher_request *req)
 {
-	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	/* generate the initial tweak */
 	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -274,21 +231,19 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc,
 		bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr,
 				  walk.nbytes, &ctx->enc, walk.iv);
 		kernel_neon_end();
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	return err;
 }
 
-static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
-			     struct scatterlist *dst,
-			     struct scatterlist *src, unsigned int nbytes)
+static int aesbs_xts_decrypt(struct skcipher_request *req)
 {
-	struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	/* generate the initial tweak */
 	AES_encrypt(walk.iv, walk.iv, &ctx->twkey);
@@ -298,141 +253,110 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc,
 		bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr,
 				  walk.nbytes, &ctx->dec, walk.iv);
 		kernel_neon_end();
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	return err;
 }
 
-static struct crypto_alg aesbs_algs[] = { {
-	.cra_name		= "__cbc-aes-neonbs",
-	.cra_driver_name	= "__driver-cbc-aes-neonbs",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= aesbs_cbc_set_key,
-		.encrypt	= aesbs_cbc_encrypt,
-		.decrypt	= aesbs_cbc_decrypt,
+static struct skcipher_alg aesbs_algs[] = { {
+	.base = {
+		.cra_name		= "__cbc(aes)",
+		.cra_driver_name	= "__cbc-aes-neonbs",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct aesbs_cbc_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= aesbs_cbc_set_key,
+	.encrypt	= aesbs_cbc_encrypt,
+	.decrypt	= aesbs_cbc_decrypt,
 }, {
-	.cra_name		= "__ctr-aes-neonbs",
-	.cra_driver_name	= "__driver-ctr-aes-neonbs",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= aesbs_ctr_set_key,
-		.encrypt	= aesbs_ctr_encrypt,
-		.decrypt	= aesbs_ctr_encrypt,
+	.base = {
+		.cra_name		= "__ctr(aes)",
+		.cra_driver_name	= "__ctr-aes-neonbs",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct aesbs_ctr_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.setkey		= aesbs_ctr_set_key,
+	.encrypt	= aesbs_ctr_encrypt,
+	.decrypt	= aesbs_ctr_encrypt,
 }, {
-	.cra_name		= "__xts-aes-neonbs",
-	.cra_driver_name	= "__driver-xts-aes-neonbs",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= aesbs_xts_set_key,
-		.encrypt	= aesbs_xts_encrypt,
-		.decrypt	= aesbs_xts_decrypt,
+	.base = {
+		.cra_name		= "__xts(aes)",
+		.cra_driver_name	= "__xts-aes-neonbs",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct aesbs_xts_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-neonbs",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= __ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-neonbs",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-neonbs",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= aesbs_xts_set_key,
+	.encrypt	= aesbs_xts_encrypt,
+	.decrypt	= aesbs_xts_decrypt,
 } };
 
+struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)];
+
+static void aesbs_mod_exit(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++)
+		simd_skcipher_free(aesbs_simd_algs[i]);
+
+	crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+}
+
 static int __init aesbs_mod_init(void)
 {
+	struct simd_skcipher_alg *simd;
+	const char *basename;
+	const char *algname;
+	const char *drvname;
+	int err;
+	int i;
+
 	if (!cpu_has_neon())
 		return -ENODEV;
 
-	return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
-}
+	err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+	if (err)
+		return err;
 
-static void __exit aesbs_mod_exit(void)
-{
-	crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs));
+	for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) {
+		algname = aesbs_algs[i].base.cra_name + 2;
+		drvname = aesbs_algs[i].base.cra_driver_name + 2;
+		basename = aesbs_algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto unregister_simds;
+
+		aesbs_simd_algs[i] = simd;
+	}
+
+	return 0;
+
+unregister_simds:
+	aesbs_mod_exit();
+	return err;
 }
 
 module_init(aesbs_mod_init);

^ permalink raw reply related

* [v3 PATCH 15/16] crypto: cbc - Export CBC implementation
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch moves the core CBC implementation into a header file
so that it can be reused by drivers implementing CBC.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/cbc.c         |  129 ---------------------------------------------
 include/crypto/cbc.h |  146 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 127 deletions(-)

diff --git a/crypto/cbc.c b/crypto/cbc.c
index 6539b38..68f751a 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -1,7 +1,7 @@
 /*
  * CBC: Cipher Block Chaining mode
  *
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2006-2016 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -10,6 +10,7 @@
  *
  */
 
+#include <crypto/cbc.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
@@ -38,71 +39,6 @@ static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key,
 	return err;
 }
 
-static inline int crypto_cbc_encrypt_segment(
-	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
-	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
-{
-	unsigned int bsize = crypto_skcipher_blocksize(tfm);
-	unsigned int nbytes = walk->nbytes;
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	u8 *iv = walk->iv;
-
-	do {
-		crypto_xor(iv, src, bsize);
-		fn(tfm, iv, dst);
-		memcpy(iv, dst, bsize);
-
-		src += bsize;
-		dst += bsize;
-	} while ((nbytes -= bsize) >= bsize);
-
-	return nbytes;
-}
-
-static inline int crypto_cbc_encrypt_inplace(
-	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
-	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
-{
-	unsigned int bsize = crypto_skcipher_blocksize(tfm);
-	unsigned int nbytes = walk->nbytes;
-	u8 *src = walk->src.virt.addr;
-	u8 *iv = walk->iv;
-
-	do {
-		crypto_xor(src, iv, bsize);
-		fn(tfm, src, src);
-		iv = src;
-
-		src += bsize;
-	} while ((nbytes -= bsize) >= bsize);
-
-	memcpy(walk->iv, iv, bsize);
-
-	return nbytes;
-}
-
-static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req,
-					  void (*fn)(struct crypto_skcipher *,
-						     const u8 *, u8 *))
-{
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct skcipher_walk walk;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, false);
-
-	while (walk.nbytes) {
-		if (walk.src.virt.addr == walk.dst.virt.addr)
-			err = crypto_cbc_encrypt_inplace(&walk, tfm, fn);
-		else
-			err = crypto_cbc_encrypt_segment(&walk, tfm, fn);
-		err = skcipher_walk_done(&walk, err);
-	}
-
-	return err;
-}
-
 static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm,
 					  const u8 *src, u8 *dst)
 {
@@ -116,67 +52,6 @@ static int crypto_cbc_encrypt(struct skcipher_request *req)
 	return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one);
 }
 
-static inline int crypto_cbc_decrypt_segment(
-	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
-	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
-{
-	unsigned int bsize = crypto_skcipher_blocksize(tfm);
-	unsigned int nbytes = walk->nbytes;
-	u8 *src = walk->src.virt.addr;
-	u8 *dst = walk->dst.virt.addr;
-	u8 *iv = walk->iv;
-
-	do {
-		fn(tfm, src, dst);
-		crypto_xor(dst, iv, bsize);
-		iv = src;
-
-		src += bsize;
-		dst += bsize;
-	} while ((nbytes -= bsize) >= bsize);
-
-	memcpy(walk->iv, iv, bsize);
-
-	return nbytes;
-}
-
-static inline int crypto_cbc_decrypt_inplace(
-	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
-	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
-{
-	unsigned int bsize = crypto_skcipher_blocksize(tfm);
-	unsigned int nbytes = walk->nbytes;
-	u8 *src = walk->src.virt.addr;
-	u8 last_iv[bsize];
-
-	/* Start of the last block. */
-	src += nbytes - (nbytes & (bsize - 1)) - bsize;
-	memcpy(last_iv, src, bsize);
-
-	for (;;) {
-		fn(tfm, src, src);
-		if ((nbytes -= bsize) < bsize)
-			break;
-		crypto_xor(src, src - bsize, bsize);
-		src -= bsize;
-	}
-
-	crypto_xor(src, walk->iv, bsize);
-	memcpy(walk->iv, last_iv, bsize);
-
-	return nbytes;
-}
-
-static inline int crypto_cbc_decrypt_blocks(
-	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
-	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
-{
-	if (walk->src.virt.addr == walk->dst.virt.addr)
-		return crypto_cbc_decrypt_inplace(walk, tfm, fn);
-	else
-		return crypto_cbc_decrypt_segment(walk, tfm, fn);
-}
-
 static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm,
 					  const u8 *src, u8 *dst)
 {
diff --git a/include/crypto/cbc.h b/include/crypto/cbc.h
new file mode 100644
index 0000000..f5b8bfc
--- /dev/null
+++ b/include/crypto/cbc.h
@@ -0,0 +1,146 @@
+/*
+ * CBC: Cipher Block Chaining mode
+ *
+ * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_CBC_H
+#define _CRYPTO_CBC_H
+
+#include <crypto/internal/skcipher.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+static inline int crypto_cbc_encrypt_segment(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_xor(iv, src, bsize);
+		fn(tfm, iv, dst);
+		memcpy(iv, dst, bsize);
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	return nbytes;
+}
+
+static inline int crypto_cbc_encrypt_inplace(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		crypto_xor(src, iv, bsize);
+		fn(tfm, src, src);
+		iv = src;
+
+		src += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req,
+					  void (*fn)(struct crypto_skcipher *,
+						     const u8 *, u8 *))
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes) {
+		if (walk.src.virt.addr == walk.dst.virt.addr)
+			err = crypto_cbc_encrypt_inplace(&walk, tfm, fn);
+		else
+			err = crypto_cbc_encrypt_segment(&walk, tfm, fn);
+		err = skcipher_walk_done(&walk, err);
+	}
+
+	return err;
+}
+
+static inline int crypto_cbc_decrypt_segment(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	u8 *iv = walk->iv;
+
+	do {
+		fn(tfm, src, dst);
+		crypto_xor(dst, iv, bsize);
+		iv = src;
+
+		src += bsize;
+		dst += bsize;
+	} while ((nbytes -= bsize) >= bsize);
+
+	memcpy(walk->iv, iv, bsize);
+
+	return nbytes;
+}
+
+static inline int crypto_cbc_decrypt_inplace(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
+	unsigned int nbytes = walk->nbytes;
+	u8 *src = walk->src.virt.addr;
+	u8 last_iv[bsize];
+
+	/* Start of the last block. */
+	src += nbytes - (nbytes & (bsize - 1)) - bsize;
+	memcpy(last_iv, src, bsize);
+
+	for (;;) {
+		fn(tfm, src, src);
+		if ((nbytes -= bsize) < bsize)
+			break;
+		crypto_xor(src, src - bsize, bsize);
+		src -= bsize;
+	}
+
+	crypto_xor(src, walk->iv, bsize);
+	memcpy(walk->iv, last_iv, bsize);
+
+	return nbytes;
+}
+
+static inline int crypto_cbc_decrypt_blocks(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	if (walk->src.virt.addr == walk->dst.virt.addr)
+		return crypto_cbc_decrypt_inplace(walk, tfm, fn);
+	else
+		return crypto_cbc_decrypt_segment(walk, tfm, fn);
+}
+
+#endif	/* _CRYPTO_CBC_H */

^ permalink raw reply related

* [v3 PATCH 14/16] crypto: cbc - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts cbc over to the skcipher interface.  It also
rearranges the code to allow it to be reused by drivers.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/cbc.c |  242 +++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 138 insertions(+), 104 deletions(-)

diff --git a/crypto/cbc.c b/crypto/cbc.c
index 780ee27..6539b38 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -10,42 +10,39 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/scatterlist.h>
 #include <linux/slab.h>
 
 struct crypto_cbc_ctx {
 	struct crypto_cipher *child;
 };
 
-static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key,
 			     unsigned int keylen)
 {
-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(parent);
 	struct crypto_cipher *child = ctx->child;
 	int err;
 
 	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
 				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_cipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 	return err;
 }
 
-static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk,
-				      struct crypto_cipher *tfm)
+static inline int crypto_cbc_encrypt_segment(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_encrypt;
-	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *dst = walk->dst.virt.addr;
@@ -53,7 +50,7 @@ static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
 
 	do {
 		crypto_xor(iv, src, bsize);
-		fn(crypto_cipher_tfm(tfm), dst, iv);
+		fn(tfm, iv, dst);
 		memcpy(iv, dst, bsize);
 
 		src += bsize;
@@ -63,20 +60,18 @@ static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk,
-				      struct crypto_cipher *tfm)
+static inline int crypto_cbc_encrypt_inplace(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_encrypt;
-	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
 
 	do {
 		crypto_xor(src, iv, bsize);
-		fn(crypto_cipher_tfm(tfm), src, src);
+		fn(tfm, src, src);
 		iv = src;
 
 		src += bsize;
@@ -87,44 +82,52 @@ static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_cbc_encrypt(struct blkcipher_desc *desc,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
+static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req,
+					  void (*fn)(struct crypto_skcipher *,
+						     const u8 *, u8 *))
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct crypto_cipher *child = ctx->child;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
+	while (walk.nbytes) {
 		if (walk.src.virt.addr == walk.dst.virt.addr)
-			nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child);
+			err = crypto_cbc_encrypt_inplace(&walk, tfm, fn);
 		else
-			nbytes = crypto_cbc_encrypt_segment(desc, &walk, child);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+			err = crypto_cbc_encrypt_segment(&walk, tfm, fn);
+		err = skcipher_walk_done(&walk, err);
 	}
 
 	return err;
 }
 
-static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk,
-				      struct crypto_cipher *tfm)
+static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm,
+					  const u8 *src, u8 *dst)
+{
+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_cipher_encrypt_one(ctx->child, dst, src);
+}
+
+static int crypto_cbc_encrypt(struct skcipher_request *req)
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_decrypt;
-	int bsize = crypto_cipher_blocksize(tfm);
+	return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one);
+}
+
+static inline int crypto_cbc_decrypt_segment(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
+{
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *dst = walk->dst.virt.addr;
 	u8 *iv = walk->iv;
 
 	do {
-		fn(crypto_cipher_tfm(tfm), dst, src);
+		fn(tfm, src, dst);
 		crypto_xor(dst, iv, bsize);
 		iv = src;
 
@@ -137,13 +140,11 @@ static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
-				      struct blkcipher_walk *walk,
-				      struct crypto_cipher *tfm)
+static inline int crypto_cbc_decrypt_inplace(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_decrypt;
-	int bsize = crypto_cipher_blocksize(tfm);
+	unsigned int bsize = crypto_skcipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 last_iv[bsize];
@@ -153,7 +154,7 @@ static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
 	memcpy(last_iv, src, bsize);
 
 	for (;;) {
-		fn(crypto_cipher_tfm(tfm), src, src);
+		fn(tfm, src, src);
 		if ((nbytes -= bsize) < bsize)
 			break;
 		crypto_xor(src, src - bsize, bsize);
@@ -166,35 +167,46 @@ static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_cbc_decrypt(struct blkcipher_desc *desc,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
+static inline int crypto_cbc_decrypt_blocks(
+	struct skcipher_walk *walk, struct crypto_skcipher *tfm,
+	void (*fn)(struct crypto_skcipher *, const u8 *, u8 *))
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
-	struct crypto_cipher *child = ctx->child;
+	if (walk->src.virt.addr == walk->dst.virt.addr)
+		return crypto_cbc_decrypt_inplace(walk, tfm, fn);
+	else
+		return crypto_cbc_decrypt_segment(walk, tfm, fn);
+}
+
+static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm,
+					  const u8 *src, u8 *dst)
+{
+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_cipher_decrypt_one(ctx->child, dst, src);
+}
+
+static int crypto_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct skcipher_walk walk;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes)) {
-		if (walk.src.virt.addr == walk.dst.virt.addr)
-			nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child);
-		else
-			nbytes = crypto_cbc_decrypt_segment(desc, &walk, child);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+	while (walk.nbytes) {
+		err = crypto_cbc_decrypt_blocks(&walk, tfm,
+						crypto_cbc_decrypt_one);
+		err = skcipher_walk_done(&walk, err);
 	}
 
 	return err;
 }
 
-static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
+static int crypto_cbc_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_cipher *cipher;
 
 	cipher = crypto_spawn_cipher(spawn);
@@ -205,72 +217,94 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_cbc_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
 	crypto_free_cipher(ctx->child);
 }
 
-static struct crypto_instance *crypto_cbc_alloc(struct rtattr **tb)
+static void crypto_cbc_free(struct skcipher_instance *inst)
 {
-	struct crypto_instance *inst;
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct skcipher_instance *inst;
+	struct crypto_spawn *spawn;
 	struct crypto_alg *alg;
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
 	if (err)
-		return ERR_PTR(err);
+		return err;
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
 
 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
+	err = PTR_ERR(alg);
 	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+		goto err_free_inst;
 
-	inst = ERR_PTR(-EINVAL);
-	if (!is_power_of_2(alg->cra_blocksize))
-		goto out_put_alg;
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg);
+	if (err)
+		goto err_drop_spawn;
 
-	inst = crypto_alloc_instance("cbc", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = -EINVAL;
+	if (!is_power_of_2(alg->cra_blocksize))
+		goto err_drop_spawn;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_blkcipher_type;
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
 	/* We access the data as u32s when xoring. */
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_cbc_ctx);
 
-	inst->alg.cra_init = crypto_cbc_init_tfm;
-	inst->alg.cra_exit = crypto_cbc_exit_tfm;
+	inst->alg.init = crypto_cbc_init_tfm;
+	inst->alg.exit = crypto_cbc_exit_tfm;
 
-	inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt;
+	inst->alg.setkey = crypto_cbc_setkey;
+	inst->alg.encrypt = crypto_cbc_encrypt;
+	inst->alg.decrypt = crypto_cbc_decrypt;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+	inst->free = crypto_cbc_free;
 
-static void crypto_cbc_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_cbc_tmpl = {
 	.name = "cbc",
-	.alloc = crypto_cbc_alloc,
-	.free = crypto_cbc_free,
+	.create = crypto_cbc_create,
 	.module = THIS_MODULE,
 };
 

^ permalink raw reply related

* [v3 PATCH 10/16] crypto: testmgr - Do not test internal algorithms
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

Currently we manually filter out internal algorithms using a list
in testmgr.  This is dangerous as internal algorithms cannot be
safely used even by testmgr.  This patch ensures that they're never
processed by testmgr at all.

This patch also removes an obsolete bypass for nivciphers which
no longer exist.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/algboss.c |    8 --
 crypto/testmgr.c |  153 +++----------------------------------------------------
 2 files changed, 11 insertions(+), 150 deletions(-)

diff --git a/crypto/algboss.c b/crypto/algboss.c
index 6e39d9c..ccb85e1 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -247,12 +247,8 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
 	memcpy(param->alg, alg->cra_name, sizeof(param->alg));
 	type = alg->cra_flags;
 
-	/* This piece of crap needs to disappear into per-type test hooks. */
-	if (!((type ^ CRYPTO_ALG_TYPE_BLKCIPHER) &
-	      CRYPTO_ALG_TYPE_BLKCIPHER_MASK) && !(type & CRYPTO_ALG_GENIV) &&
-	    ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
-	     CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize :
-					 alg->cra_ablkcipher.ivsize))
+	/* Do not test internal algorithms. */
+	if (type & CRYPTO_ALG_INTERNAL)
 		type |= CRYPTO_ALG_TESTED;
 
 	param->type = type;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index ded50b6..6ac4696 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1625,7 +1625,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
 	struct crypto_aead *tfm;
 	int err = 0;
 
-	tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_aead(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
 		       "%ld\n", driver, PTR_ERR(tfm));
@@ -1654,7 +1654,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
 	struct crypto_cipher *tfm;
 	int err = 0;
 
-	tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_cipher(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		printk(KERN_ERR "alg: cipher: Failed to load transform for "
 		       "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1683,7 +1683,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
 	struct crypto_skcipher *tfm;
 	int err = 0;
 
-	tfm = crypto_alloc_skcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_skcipher(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		printk(KERN_ERR "alg: skcipher: Failed to load transform for "
 		       "%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1750,7 +1750,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
 	struct crypto_ahash *tfm;
 	int err;
 
-	tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_ahash(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
 		       "%ld\n", driver, PTR_ERR(tfm));
@@ -1778,7 +1778,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
 	if (err)
 		goto out;
 
-	tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_shash(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
 		       "%ld\n", driver, PTR_ERR(tfm));
@@ -1820,7 +1820,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
 	struct crypto_rng *rng;
 	int err;
 
-	rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	rng = crypto_alloc_rng(driver, type, mask);
 	if (IS_ERR(rng)) {
 		printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
 		       "%ld\n", driver, PTR_ERR(rng));
@@ -1847,7 +1847,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
 	if (!buf)
 		return -ENOMEM;
 
-	drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	drng = crypto_alloc_rng(driver, type, mask);
 	if (IS_ERR(drng)) {
 		printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
 		       "%s\n", driver);
@@ -2041,7 +2041,7 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver,
 	struct crypto_kpp *tfm;
 	int err = 0;
 
-	tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_kpp(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		pr_err("alg: kpp: Failed to load tfm for %s: %ld\n",
 		       driver, PTR_ERR(tfm));
@@ -2200,7 +2200,7 @@ static int alg_test_akcipher(const struct alg_test_desc *desc,
 	struct crypto_akcipher *tfm;
 	int err = 0;
 
-	tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
+	tfm = crypto_alloc_akcipher(driver, type, mask);
 	if (IS_ERR(tfm)) {
 		pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n",
 		       driver, PTR_ERR(tfm));
@@ -2223,88 +2223,6 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
-		.alg = "__cbc-cast5-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__cbc-cast6-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__cbc-serpent-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__cbc-serpent-avx2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__cbc-serpent-sse2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__cbc-twofish-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-aes-aesni",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "__driver-cbc-camellia-aesni",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-camellia-aesni-avx2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-cast5-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-cast6-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-serpent-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-serpent-avx2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-serpent-sse2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-cbc-twofish-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-aes-aesni",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "__driver-ecb-camellia-aesni",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-camellia-aesni-avx2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-cast5-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-cast6-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-serpent-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-serpent-avx2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-serpent-sse2",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-ecb-twofish-avx",
-		.test = alg_test_null,
-	}, {
-		.alg = "__driver-gcm-aes-aesni",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "__ghash-pclmulqdqni",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
 		.alg = "ansi_cprng",
 		.test = alg_test_cprng,
 		.suite = {
@@ -2791,55 +2709,6 @@ static int alg_test_null(const struct alg_test_desc *desc,
 			}
 		}
 	}, {
-		.alg = "cryptd(__driver-cbc-aes-aesni)",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "cryptd(__driver-cbc-camellia-aesni)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-cbc-camellia-aesni-avx2)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-cbc-serpent-avx2)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-aes-aesni)",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "cryptd(__driver-ecb-camellia-aesni)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-camellia-aesni-avx2)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-cast5-avx)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-cast6-avx)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-serpent-avx)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-serpent-avx2)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-serpent-sse2)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-ecb-twofish-avx)",
-		.test = alg_test_null,
-	}, {
-		.alg = "cryptd(__driver-gcm-aes-aesni)",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
-		.alg = "cryptd(__ghash-pclmulqdqni)",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
 		.alg = "ctr(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,
@@ -3166,10 +3035,6 @@ static int alg_test_null(const struct alg_test_desc *desc,
 		.fips_allowed = 1,
 		.test = alg_test_null,
 	}, {
-		.alg = "ecb(__aes-aesni)",
-		.test = alg_test_null,
-		.fips_allowed = 1,
-	}, {
 		.alg = "ecb(aes)",
 		.test = alg_test_skcipher,
 		.fips_allowed = 1,

^ permalink raw reply related

* [v3 PATCH 13/16] crypto: aes-ce - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts aes-ce over to the skcipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/arm/crypto/aes-ce-glue.c |  390 ++++++++++++++++--------------------------
 1 file changed, 157 insertions(+), 233 deletions(-)

diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 04410d9..4a507c6 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -12,8 +12,8 @@
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
 
@@ -133,17 +133,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 	return 0;
 }
 
-static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			 unsigned int key_len)
 {
-	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
 	ret = ce_aes_expandkey(ctx, in_key, key_len);
 	if (!ret)
 		return 0;
 
-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
@@ -152,13 +152,13 @@ struct crypto_aes_xts_ctx {
 	struct crypto_aes_ctx __aligned(8) key2;
 };
 
-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
-	ret = xts_check_key(tfm, in_key, key_len);
+	ret = xts_verify_key(tfm, in_key, key_len);
 	if (ret)
 		return ret;
 
@@ -169,130 +169,113 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	if (!ret)
 		return 0;
 
-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int blocks;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int blocks;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int blocks;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
 				   walk.iv);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int blocks;
 	int err;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
 				   walk.iv);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	int err, blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
 		ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
 				   walk.iv);
-		nbytes -= blocks * AES_BLOCK_SIZE;
-		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
-			break;
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (walk.nbytes % AES_BLOCK_SIZE) {
-		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+		unsigned int nbytes = walk.nbytes;
+		u8 *tdst = walk.dst.virt.addr;
+		u8 *tsrc = walk.src.virt.addr;
 
 		/*
 		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
@@ -303,231 +286,172 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
 				   num_rounds(ctx), blocks, walk.iv);
 		memcpy(tdst, tail, nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = num_rounds(&ctx->key1);
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key1.key_enc, rounds, blocks,
 				   walk.iv, (u8 *)ctx->key2.key_enc, first);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = num_rounds(&ctx->key1);
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   (u8 *)ctx->key1.key_dec, rounds, blocks,
 				   walk.iv, (u8 *)ctx->key2.key_enc, first);
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static struct crypto_alg aes_algs[] = { {
-	.cra_name		= "__ecb-aes-ce",
-	.cra_driver_name	= "__driver-ecb-aes-ce",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= 0,
-		.setkey		= ce_aes_setkey,
-		.encrypt	= ecb_encrypt,
-		.decrypt	= ecb_decrypt,
+static struct skcipher_alg aes_algs[] = { {
+	.base = {
+		.cra_name		= "__ecb(aes)",
+		.cra_driver_name	= "__ecb-aes-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.setkey		= ce_aes_setkey,
+	.encrypt	= ecb_encrypt,
+	.decrypt	= ecb_decrypt,
 }, {
-	.cra_name		= "__cbc-aes-ce",
-	.cra_driver_name	= "__driver-cbc-aes-ce",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ce_aes_setkey,
-		.encrypt	= cbc_encrypt,
-		.decrypt	= cbc_decrypt,
+	.base = {
+		.cra_name		= "__cbc(aes)",
+		.cra_driver_name	= "__cbc-aes-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= ce_aes_setkey,
+	.encrypt	= cbc_encrypt,
+	.decrypt	= cbc_decrypt,
 }, {
-	.cra_name		= "__ctr-aes-ce",
-	.cra_driver_name	= "__driver-ctr-aes-ce",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ce_aes_setkey,
-		.encrypt	= ctr_encrypt,
-		.decrypt	= ctr_encrypt,
+	.base = {
+		.cra_name		= "__ctr(aes)",
+		.cra_driver_name	= "__ctr-aes-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.setkey		= ce_aes_setkey,
+	.encrypt	= ctr_encrypt,
+	.decrypt	= ctr_encrypt,
 }, {
-	.cra_name		= "__xts-aes-ce",
-	.cra_driver_name	= "__driver-xts-aes-ce",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= xts_set_key,
-		.encrypt	= xts_encrypt,
-		.decrypt	= xts_decrypt,
+	.base = {
+		.cra_name		= "__xts(aes)",
+		.cra_driver_name	= "__xts-aes-ce",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
-}, {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-ce",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= 0,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-ce",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-ce",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-ce",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= xts_set_key,
+	.encrypt	= xts_encrypt,
+	.decrypt	= xts_decrypt,
 } };
 
+struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
+
+static void aes_exit(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
+		simd_skcipher_free(aes_simd_algs[i]);
+
+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
 static int __init aes_init(void)
 {
+	struct simd_skcipher_alg *simd;
+	const char *basename;
+	const char *algname;
+	const char *drvname;
+	int err;
+	int i;
+
 	if (!(elf_hwcap2 & HWCAP2_AES))
 		return -ENODEV;
-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
-}
 
-static void __exit aes_exit(void)
-{
-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		algname = aes_algs[i].base.cra_name + 2;
+		drvname = aes_algs[i].base.cra_driver_name + 2;
+		basename = aes_algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto unregister_simds;
+
+		aes_simd_algs[i] = simd;
+	}
+
+	return 0;
+
+unregister_simds:
+	aes_exit();
+	return err;
 }
 
 module_init(aes_init);

^ permalink raw reply related

* [v3 PATCH 12/16] crypto: arm64/aes - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts arm64/aes over to the skcipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/arm64/crypto/aes-glue.c |  382 +++++++++++++++++--------------------------
 1 file changed, 158 insertions(+), 224 deletions(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 6b2aa0f..24f6137 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -11,8 +11,8 @@
 #include <asm/neon.h>
 #include <asm/hwcap.h>
 #include <crypto/aes.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
 #include <crypto/xts.h>
@@ -80,13 +80,19 @@ struct crypto_aes_xts_ctx {
 	struct crypto_aes_ctx __aligned(8) key2;
 };
 
-static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+			       unsigned int key_len)
+{
+	return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 		       unsigned int key_len)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
-	ret = xts_check_key(tfm, in_key, key_len);
+	ret = xts_verify_key(tfm, in_key, key_len);
 	if (ret)
 		return ret;
 
@@ -97,111 +103,101 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	if (!ret)
 		return 0;
 
-	tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 	return -EINVAL;
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_dec, rounds, blocks, walk.iv,
 				first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 	return err;
 }
 
-static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int ctr_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	first = 1;
 	kernel_neon_begin();
@@ -209,17 +205,13 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv,
 				first);
-		first = 0;
-		nbytes -= blocks * AES_BLOCK_SIZE;
-		if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
-			break;
-		err = blkcipher_walk_done(desc, &walk,
-					  walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (walk.nbytes % AES_BLOCK_SIZE) {
-		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
-		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
+		unsigned int nbytes = walk.nbytes;
+		u8 *tdst = walk.dst.virt.addr;
+		u8 *tsrc = walk.src.virt.addr;
 
 		/*
 		 * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
@@ -230,227 +222,169 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
 				blocks, walk.iv, first);
 		memcpy(tdst, tail, nbytes);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_enc, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
-	struct blkcipher_walk walk;
+	struct skcipher_walk walk;
 	unsigned int blocks;
 
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_neon_begin();
 	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
 		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key1.key_dec, rounds, blocks,
 				(u8 *)ctx->key2.key_enc, walk.iv, first);
-		err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	kernel_neon_end();
 
 	return err;
 }
 
-static struct crypto_alg aes_algs[] = { {
-	.cra_name		= "__ecb-aes-" MODE,
-	.cra_driver_name	= "__driver-ecb-aes-" MODE,
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= 0,
-		.setkey		= aes_setkey,
-		.encrypt	= ecb_encrypt,
-		.decrypt	= ecb_decrypt,
+static struct skcipher_alg aes_algs[] = { {
+	.base = {
+		.cra_name		= "__ecb(aes)",
+		.cra_driver_name	= "__ecb-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= ecb_encrypt,
+	.decrypt	= ecb_decrypt,
 }, {
-	.cra_name		= "__cbc-aes-" MODE,
-	.cra_driver_name	= "__driver-cbc-aes-" MODE,
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= aes_setkey,
-		.encrypt	= cbc_encrypt,
-		.decrypt	= cbc_decrypt,
+	.base = {
+		.cra_name		= "__cbc(aes)",
+		.cra_driver_name	= "__cbc-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= cbc_encrypt,
+	.decrypt	= cbc_decrypt,
 }, {
-	.cra_name		= "__ctr-aes-" MODE,
-	.cra_driver_name	= "__driver-ctr-aes-" MODE,
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= aes_setkey,
-		.encrypt	= ctr_encrypt,
-		.decrypt	= ctr_encrypt,
+	.base = {
+		.cra_name		= "__ctr(aes)",
+		.cra_driver_name	= "__ctr-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= ctr_encrypt,
+	.decrypt	= ctr_encrypt,
 }, {
-	.cra_name		= "__xts-aes-" MODE,
-	.cra_driver_name	= "__driver-xts-aes-" MODE,
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_blkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= xts_set_key,
-		.encrypt	= xts_encrypt,
-		.decrypt	= xts_decrypt,
+	.base = {
+		.cra_name		= "__xts(aes)",
+		.cra_driver_name	= "__xts-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
+		.cra_alignmask		= 7,
+		.cra_module		= THIS_MODULE,
 	},
-}, {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-" MODE,
-	.cra_priority		= PRIO,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= 0,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-" MODE,
-	.cra_priority		= PRIO,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-" MODE,
-	.cra_priority		= PRIO,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= AES_MIN_KEY_SIZE,
-		.max_keysize	= AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
-}, {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-" MODE,
-	.cra_priority		= PRIO,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 7,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_ablkcipher = {
-		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-		.ivsize		= AES_BLOCK_SIZE,
-		.setkey		= ablk_set_key,
-		.encrypt	= ablk_encrypt,
-		.decrypt	= ablk_decrypt,
-	}
+	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= xts_set_key,
+	.encrypt	= xts_encrypt,
+	.decrypt	= xts_decrypt,
 } };
 
-static int __init aes_init(void)
+struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
+
+static void aes_exit(void)
 {
-	return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
+		simd_skcipher_free(aes_simd_algs[i]);
+
+	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 }
 
-static void __exit aes_exit(void)
+static int __init aes_init(void)
 {
-	crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+	struct simd_skcipher_alg *simd;
+	const char *basename;
+	const char *algname;
+	const char *drvname;
+	int err;
+	int i;
+
+	err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
+	if (err)
+		return err;
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		algname = aes_algs[i].base.cra_name + 2;
+		drvname = aes_algs[i].base.cra_driver_name + 2;
+		basename = aes_algs[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto unregister_simds;
+
+		aes_simd_algs[i] = simd;
+	}
+
+	return 0;
+
+unregister_simds:
+	aes_exit();
+	return err;
 }
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS

^ permalink raw reply related

* [v3 PATCH 8/16] crypto: pcbc - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts lrw over to the skcipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/pcbc.c |  201 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 109 insertions(+), 92 deletions(-)

diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index f654965..e4538e0 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -14,40 +14,37 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/scatterlist.h>
 #include <linux/slab.h>
 
 struct crypto_pcbc_ctx {
 	struct crypto_cipher *child;
 };
 
-static int crypto_pcbc_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_pcbc_setkey(struct crypto_skcipher *parent, const u8 *key,
 			      unsigned int keylen)
 {
-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(parent);
+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(parent);
 	struct crypto_cipher *child = ctx->child;
 	int err;
 
 	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+				       CRYPTO_TFM_REQ_MASK);
 	err = crypto_cipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-			     CRYPTO_TFM_RES_MASK);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 	return err;
 }
 
-static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
-				       struct blkcipher_walk *walk,
+static int crypto_pcbc_encrypt_segment(struct skcipher_request *req,
+				       struct skcipher_walk *walk,
 				       struct crypto_cipher *tfm)
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_encrypt;
 	int bsize = crypto_cipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
@@ -56,7 +53,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
 
 	do {
 		crypto_xor(iv, src, bsize);
-		fn(crypto_cipher_tfm(tfm), dst, iv);
+		crypto_cipher_encrypt_one(tfm, dst, iv);
 		memcpy(iv, dst, bsize);
 		crypto_xor(iv, src, bsize);
 
@@ -67,12 +64,10 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
-				       struct blkcipher_walk *walk,
+static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req,
+				       struct skcipher_walk *walk,
 				       struct crypto_cipher *tfm)
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_encrypt;
 	int bsize = crypto_cipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
@@ -82,7 +77,7 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
 	do {
 		memcpy(tmpbuf, src, bsize);
 		crypto_xor(iv, src, bsize);
-		fn(crypto_cipher_tfm(tfm), src, iv);
+		crypto_cipher_encrypt_one(tfm, src, iv);
 		memcpy(iv, tmpbuf, bsize);
 		crypto_xor(iv, src, bsize);
 
@@ -94,38 +89,34 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_pcbc_encrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
+static int crypto_pcbc_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_cipher *child = ctx->child;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
 		if (walk.src.virt.addr == walk.dst.virt.addr)
-			nbytes = crypto_pcbc_encrypt_inplace(desc, &walk,
+			nbytes = crypto_pcbc_encrypt_inplace(req, &walk,
 							     child);
 		else
-			nbytes = crypto_pcbc_encrypt_segment(desc, &walk,
+			nbytes = crypto_pcbc_encrypt_segment(req, &walk,
 							     child);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
-				       struct blkcipher_walk *walk,
+static int crypto_pcbc_decrypt_segment(struct skcipher_request *req,
+				       struct skcipher_walk *walk,
 				       struct crypto_cipher *tfm)
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_decrypt;
 	int bsize = crypto_cipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
@@ -133,7 +124,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
 	u8 *iv = walk->iv;
 
 	do {
-		fn(crypto_cipher_tfm(tfm), dst, src);
+		crypto_cipher_decrypt_one(tfm, dst, src);
 		crypto_xor(dst, iv, bsize);
 		memcpy(iv, src, bsize);
 		crypto_xor(iv, dst, bsize);
@@ -147,21 +138,19 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
-				       struct blkcipher_walk *walk,
+static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req,
+				       struct skcipher_walk *walk,
 				       struct crypto_cipher *tfm)
 {
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *) =
-		crypto_cipher_alg(tfm)->cia_decrypt;
 	int bsize = crypto_cipher_blocksize(tfm);
 	unsigned int nbytes = walk->nbytes;
 	u8 *src = walk->src.virt.addr;
 	u8 *iv = walk->iv;
-	u8 tmpbuf[bsize];
+	u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32))));
 
 	do {
 		memcpy(tmpbuf, src, bsize);
-		fn(crypto_cipher_tfm(tfm), src, src);
+		crypto_cipher_decrypt_one(tfm, src, src);
 		crypto_xor(src, iv, bsize);
 		memcpy(iv, tmpbuf, bsize);
 		crypto_xor(iv, src, bsize);
@@ -174,37 +163,35 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc,
 	return nbytes;
 }
 
-static int crypto_pcbc_decrypt(struct blkcipher_desc *desc,
-			       struct scatterlist *dst, struct scatterlist *src,
-			       unsigned int nbytes)
+static int crypto_pcbc_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_cipher *child = ctx->child;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
+	err = skcipher_walk_virt(&walk, req, false);
 
 	while ((nbytes = walk.nbytes)) {
 		if (walk.src.virt.addr == walk.dst.virt.addr)
-			nbytes = crypto_pcbc_decrypt_inplace(desc, &walk,
+			nbytes = crypto_pcbc_decrypt_inplace(req, &walk,
 							     child);
 		else
-			nbytes = crypto_pcbc_decrypt_segment(desc, &walk,
+			nbytes = crypto_pcbc_decrypt_segment(req, &walk,
 							     child);
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return err;
 }
 
-static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
+static int crypto_pcbc_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct crypto_cipher *cipher;
 
 	cipher = crypto_spawn_cipher(spawn);
@@ -215,68 +202,98 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static void crypto_pcbc_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_pcbc_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
 	crypto_free_cipher(ctx->child);
 }
 
-static struct crypto_instance *crypto_pcbc_alloc(struct rtattr **tb)
+static void crypto_pcbc_free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-	struct crypto_instance *inst;
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct crypto_spawn *spawn;
 	struct crypto_alg *alg;
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
-	if (err)
-		return ERR_PTR(err);
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if (((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) &
+	    ~CRYPTO_ALG_INTERNAL)
+		return -EINVAL;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER |
+				      (algt->type & CRYPTO_ALG_INTERNAL),
+				  CRYPTO_ALG_TYPE_MASK |
+				  (algt->mask & CRYPTO_ALG_INTERNAL));
+	err = PTR_ERR(alg);
 	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+		goto err_free_inst;
+
+	spawn = skcipher_instance_ctx(inst);
+	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	crypto_mod_put(alg);
+	if (err)
+		goto err_free_inst;
 
-	inst = crypto_alloc_instance("pcbc", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg);
+	if (err)
+		goto err_drop_spawn;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_blkcipher_type;
+	inst->alg.base.cra_flags = alg->cra_flags & CRYPTO_ALG_INTERNAL;
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
 
 	/* We access the data as u32s when xoring. */
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+	inst->alg.ivsize = alg->cra_blocksize;
+	inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
 
-	inst->alg.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_pcbc_ctx);
 
-	inst->alg.cra_init = crypto_pcbc_init_tfm;
-	inst->alg.cra_exit = crypto_pcbc_exit_tfm;
+	inst->alg.init = crypto_pcbc_init_tfm;
+	inst->alg.exit = crypto_pcbc_exit_tfm;
 
-	inst->alg.cra_blkcipher.setkey = crypto_pcbc_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_pcbc_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_pcbc_decrypt;
+	inst->alg.setkey = crypto_pcbc_setkey;
+	inst->alg.encrypt = crypto_pcbc_encrypt;
+	inst->alg.decrypt = crypto_pcbc_decrypt;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+	inst->free = crypto_pcbc_free;
 
-static void crypto_pcbc_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_spawn(spawn);
+err_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_pcbc_tmpl = {
 	.name = "pcbc",
-	.alloc = crypto_pcbc_alloc,
-	.free = crypto_pcbc_free,
+	.create = crypto_pcbc_create,
 	.module = THIS_MODULE,
 };
 

^ permalink raw reply related

* [v3 PATCH 11/16] crypto: aesni - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts aesni (including fpu) over to the skcipher
interface.  The LRW implementation has been removed as the generic
LRW code can now be used directly on top of the accelerated ECB
implementation.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/x86/crypto/aesni-intel_glue.c |  705 +++++++++++--------------------------
 arch/x86/crypto/fpu.c              |  207 ++++++----
 crypto/Kconfig                     |    7 
 3 files changed, 346 insertions(+), 573 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 0ab5ee1..f466019 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -21,7 +21,6 @@
 
 #include <linux/hardirq.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/err.h>
 #include <crypto/algapi.h>
@@ -29,14 +28,14 @@
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
 #include <crypto/b128ops.h>
-#include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/fpu/api.h>
 #include <asm/crypto/aes.h>
-#include <crypto/ablk_helper.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #ifdef CONFIG_X86_64
@@ -45,28 +44,26 @@
 
 
 #define AESNI_ALIGN	16
+#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE - 1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
+#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
+#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
 
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
  */
 struct aesni_rfc4106_gcm_ctx {
-	u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
-	struct crypto_aes_ctx aes_key_expanded
-		__attribute__ ((__aligned__(AESNI_ALIGN)));
+	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
+	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
 	u8 nonce[4];
 };
 
-struct aesni_lrw_ctx {
-	struct lrw_table_ctx lrw_table;
-	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
-};
-
 struct aesni_xts_ctx {
-	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
-	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
+	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
 };
 
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	aesni_dec(ctx, dst, src);
 }
 
-static int ecb_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			         unsigned int len)
+{
+	return aes_set_key_common(crypto_skcipher_tfm(tfm),
+				  crypto_skcipher_ctx(tfm), key, len);
+}
+
+static int ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_fpu_begin();
 	while ((nbytes = walk.nbytes)) {
 		aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
 			      nbytes & AES_BLOCK_MASK);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	kernel_fpu_end();
 
 	return err;
 }
 
-static int ecb_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_fpu_begin();
 	while ((nbytes = walk.nbytes)) {
 		aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
 			      nbytes & AES_BLOCK_MASK);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	kernel_fpu_end();
 
 	return err;
 }
 
-static int cbc_encrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_fpu_begin();
 	while ((nbytes = walk.nbytes)) {
 		aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
 			      nbytes & AES_BLOCK_MASK, walk.iv);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	kernel_fpu_end();
 
 	return err;
 }
 
-static int cbc_decrypt(struct blkcipher_desc *desc,
-		       struct scatterlist *dst, struct scatterlist *src,
-		       unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt(desc, &walk);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_fpu_begin();
 	while ((nbytes = walk.nbytes)) {
 		aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
 			      nbytes & AES_BLOCK_MASK, walk.iv);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	kernel_fpu_end();
 
@@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
 
 #ifdef CONFIG_X86_64
 static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
-			    struct blkcipher_walk *walk)
+			    struct skcipher_walk *walk)
 {
 	u8 *ctrblk = walk->iv;
 	u8 keystream[AES_BLOCK_SIZE];
@@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
 }
 #endif
 
-static int ctr_crypt(struct blkcipher_desc *desc,
-		     struct scatterlist *dst, struct scatterlist *src,
-		     unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
 {
-	struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm));
-	struct blkcipher_walk walk;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+	struct skcipher_walk walk;
+	unsigned int nbytes;
 	int err;
 
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = skcipher_walk_virt(&walk, req, true);
 
 	kernel_fpu_begin();
 	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
 			              nbytes & AES_BLOCK_MASK, walk.iv);
 		nbytes &= AES_BLOCK_SIZE - 1;
-		err = blkcipher_walk_done(desc, &walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes);
 	}
 	if (walk.nbytes) {
 		ctr_crypt_final(ctx, &walk);
-		err = blkcipher_walk_done(desc, &walk, 0);
+		err = skcipher_walk_done(&walk, 0);
 	}
 	kernel_fpu_end();
 
 	return err;
 }
-#endif
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "__driver-ecb-aes-aesni");
-}
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "__driver-cbc-aes-aesni");
-}
-
-#ifdef CONFIG_X86_64
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "__driver-ctr-aes-aesni");
-}
-
-#endif
-
-#if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-static int ablk_pcbc_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))");
-}
-#endif
-
-static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
-	aesni_ecb_enc(ctx, blks, blks, nbytes);
-}
 
-static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
-{
-	aesni_ecb_dec(ctx, blks, blks, nbytes);
-}
-
-static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			    unsigned int keylen)
 {
-	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
-				 keylen - AES_BLOCK_SIZE);
+	err = xts_verify_key(tfm, key, keylen);
 	if (err)
 		return err;
 
-	return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
-}
-
-static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	lrw_free_table(&ctx->lrw_table);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[8];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
-		.crypt_fn = lrw_xts_encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	kernel_fpu_end();
-
-	return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[8];
-	struct lrw_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.table_ctx = &ctx->lrw_table,
-		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
-		.crypt_fn = lrw_xts_decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	ret = lrw_crypt(desc, dst, src, nbytes, &req);
-	kernel_fpu_end();
-
-	return ret;
-}
-
-static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
-			    unsigned int keylen)
-{
-	struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
-
-	err = xts_check_key(tfm, key, keylen);
-	if (err)
-		return err;
+	keylen /= 2;
 
 	/* first half of xts-key is for crypt */
-	err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+	err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
+				 key, keylen);
 	if (err)
 		return err;
 
 	/* second half of xts-key is for tweak */
-	return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
-				  keylen / 2);
+	return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
+				  key + keylen, keylen);
 }
 
 
@@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
 	aesni_enc(ctx, out, in);
 }
 
-#ifdef CONFIG_X86_64
-
 static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 {
 	glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc));
@@ -698,83 +588,28 @@ static void aesni_xts_dec8(void *ctx, u128 *dst, const u128 *src, le128 *iv)
 	} }
 };
 
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
 {
-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(aesni_xts_tweak),
-				     aes_ctx(ctx->raw_tweak_ctx),
-				     aes_ctx(ctx->raw_crypt_ctx));
+	return glue_xts_req_128bit(&aesni_enc_xts, req,
+				   XTS_TWEAK_CAST(aesni_xts_tweak),
+				   aes_ctx(ctx->raw_tweak_ctx),
+				   aes_ctx(ctx->raw_crypt_ctx));
 }
 
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
 {
-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
-	return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes,
-				     XTS_TWEAK_CAST(aesni_xts_tweak),
-				     aes_ctx(ctx->raw_tweak_ctx),
-				     aes_ctx(ctx->raw_crypt_ctx));
-}
-
-#else
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[8];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-		.tweak_fn = aesni_xts_tweak,
-		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
-		.crypt_fn = lrw_xts_encrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	kernel_fpu_end();
-
-	return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		       struct scatterlist *src, unsigned int nbytes)
-{
-	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	be128 buf[8];
-	struct xts_crypt_req req = {
-		.tbuf = buf,
-		.tbuflen = sizeof(buf),
-
-		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-		.tweak_fn = aesni_xts_tweak,
-		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
-		.crypt_fn = lrw_xts_decrypt_callback,
-	};
-	int ret;
-
-	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
-	kernel_fpu_begin();
-	ret = xts_crypt(desc, dst, src, nbytes, &req);
-	kernel_fpu_end();
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return ret;
+	return glue_xts_req_128bit(&aesni_dec_xts, req,
+				   XTS_TWEAK_CAST(aesni_xts_tweak),
+				   aes_ctx(ctx->raw_tweak_ctx),
+				   aes_ctx(ctx->raw_crypt_ctx));
 }
 
-#endif
-
-#ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_aead *aead)
 {
 	struct cryptd_aead *cryptd_tfm;
@@ -1077,9 +912,7 @@ static int rfc4106_decrypt(struct aead_request *req)
 	.cra_priority		= 300,
 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
-				  AESNI_ALIGN - 1,
-	.cra_alignmask		= 0,
+	.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
 	.cra_module		= THIS_MODULE,
 	.cra_u	= {
 		.cipher	= {
@@ -1091,14 +924,12 @@ static int rfc4106_decrypt(struct aead_request *req)
 		}
 	}
 }, {
-	.cra_name		= "__aes-aesni",
-	.cra_driver_name	= "__driver-aes-aesni",
-	.cra_priority		= 0,
+	.cra_name		= "__aes",
+	.cra_driver_name	= "__aes-aesni",
+	.cra_priority		= 300,
 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
 	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
-				  AESNI_ALIGN - 1,
-	.cra_alignmask		= 0,
+	.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
 	.cra_module		= THIS_MODULE,
 	.cra_u	= {
 		.cipher	= {
@@ -1109,250 +940,94 @@ static int rfc4106_decrypt(struct aead_request *req)
 			.cia_decrypt		= __aes_decrypt
 		}
 	}
-}, {
-	.cra_name		= "__ecb-aes-aesni",
-	.cra_driver_name	= "__driver-ecb-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
-				  AESNI_ALIGN - 1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ecb_encrypt,
-			.decrypt	= ecb_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__cbc-aes-aesni",
-	.cra_driver_name	= "__driver-cbc-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
-				  AESNI_ALIGN - 1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= cbc_encrypt,
-			.decrypt	= cbc_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "ecb(aes)",
-	.cra_driver_name	= "ecb-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_ecb_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
+} };
+
+static struct skcipher_alg aesni_skciphers[] = {
+	{
+		.base = {
+			.cra_name		= "__ecb(aes)",
+			.cra_driver_name	= "__ecb-aes-aesni",
+			.cra_priority		= 400,
+			.cra_flags		= CRYPTO_ALG_INTERNAL,
+			.cra_blocksize		= AES_BLOCK_SIZE,
+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
+			.cra_module		= THIS_MODULE,
 		},
-	},
-}, {
-	.cra_name		= "cbc(aes)",
-	.cra_driver_name	= "cbc-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_cbc_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= aesni_skcipher_setkey,
+		.encrypt	= ecb_encrypt,
+		.decrypt	= ecb_decrypt,
+	}, {
+		.base = {
+			.cra_name		= "__cbc(aes)",
+			.cra_driver_name	= "__cbc-aes-aesni",
+			.cra_priority		= 400,
+			.cra_flags		= CRYPTO_ALG_INTERNAL,
+			.cra_blocksize		= AES_BLOCK_SIZE,
+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
+			.cra_module		= THIS_MODULE,
 		},
-	},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= aesni_skcipher_setkey,
+		.encrypt	= cbc_encrypt,
+		.decrypt	= cbc_decrypt,
 #ifdef CONFIG_X86_64
-}, {
-	.cra_name		= "__ctr-aes-aesni",
-	.cra_driver_name	= "__driver-ctr-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx) +
-				  AESNI_ALIGN - 1,
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= aes_set_key,
-			.encrypt	= ctr_crypt,
-			.decrypt	= ctr_crypt,
+	}, {
+		.base = {
+			.cra_name		= "__ctr(aes)",
+			.cra_driver_name	= "__ctr-aes-aesni",
+			.cra_priority		= 400,
+			.cra_flags		= CRYPTO_ALG_INTERNAL,
+			.cra_blocksize		= 1,
+			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
+			.cra_module		= THIS_MODULE,
 		},
-	},
-}, {
-	.cra_name		= "ctr(aes)",
-	.cra_driver_name	= "ctr-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= 1,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_ctr_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_encrypt,
-			.geniv		= "chainiv",
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.chunksize	= AES_BLOCK_SIZE,
+		.setkey		= aesni_skcipher_setkey,
+		.encrypt	= ctr_crypt,
+		.decrypt	= ctr_crypt,
+	}, {
+		.base = {
+			.cra_name		= "__xts(aes)",
+			.cra_driver_name	= "__xts-aes-aesni",
+			.cra_priority		= 401,
+			.cra_flags		= CRYPTO_ALG_INTERNAL,
+			.cra_blocksize		= AES_BLOCK_SIZE,
+			.cra_ctxsize		= XTS_AES_CTX_SIZE,
+			.cra_module		= THIS_MODULE,
 		},
-	},
+		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= xts_aesni_setkey,
+		.encrypt	= xts_encrypt,
+		.decrypt	= xts_decrypt,
 #endif
+	}
+};
+
+struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
+
+struct {
+	const char *algname;
+	const char *drvname;
+	const char *basename;
+	struct simd_skcipher_alg *simd;
+} aesni_simd_skciphers2[] = {
 #if IS_ENABLED(CONFIG_CRYPTO_PCBC)
-}, {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_pcbc_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
+	{
+		.algname	= "pcbc(aes)",
+		.drvname	= "pcbc-aes-aesni",
+		.basename	= "fpu(pcbc(__aes-aesni))",
 	},
 #endif
-}, {
-	.cra_name		= "__lrw-aes-aesni",
-	.cra_driver_name	= "__driver-lrw-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_exit		= lrw_aesni_exit_tfm,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= lrw_aesni_setkey,
-			.encrypt	= lrw_encrypt,
-			.decrypt	= lrw_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "__xts-aes-aesni",
-	.cra_driver_name	= "__driver-xts-aes-aesni",
-	.cra_priority		= 0,
-	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
-				  CRYPTO_ALG_INTERNAL,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_blkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u = {
-		.blkcipher = {
-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= xts_aesni_setkey,
-			.encrypt	= xts_encrypt,
-			.decrypt	= xts_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-}, {
-	.cra_name		= "xts(aes)",
-	.cra_driver_name	= "xts-aes-aesni",
-	.cra_priority		= 400,
-	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct async_helper_ctx),
-	.cra_alignmask		= 0,
-	.cra_type		= &crypto_ablkcipher_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_init,
-	.cra_exit		= ablk_exit,
-	.cra_u = {
-		.ablkcipher = {
-			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-			.ivsize		= AES_BLOCK_SIZE,
-			.setkey		= ablk_set_key,
-			.encrypt	= ablk_encrypt,
-			.decrypt	= ablk_decrypt,
-		},
-	},
-} };
+};
 
 #ifdef CONFIG_X86_64
 static struct aead_alg aesni_aead_algs[] = { {
@@ -1401,9 +1076,27 @@ static int rfc4106_decrypt(struct aead_request *req)
 };
 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
 
+static void aesni_free_simds(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
+		    aesni_simd_skciphers[i]; i++)
+		simd_skcipher_free(aesni_simd_skciphers[i]);
+
+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) &&
+		    aesni_simd_skciphers2[i].simd; i++)
+		simd_skcipher_free(aesni_simd_skciphers2[i].simd);
+}
+
 static int __init aesni_init(void)
 {
+	struct simd_skcipher_alg *simd;
+	const char *basename;
+	const char *algname;
+	const char *drvname;
 	int err;
+	int i;
 
 	if (!x86_match_cpu(aesni_cpu_id))
 		return -ENODEV;
@@ -1445,13 +1138,48 @@ static int __init aesni_init(void)
 	if (err)
 		goto fpu_exit;
 
+	err = crypto_register_skciphers(aesni_skciphers,
+					ARRAY_SIZE(aesni_skciphers));
+	if (err)
+		goto unregister_algs;
+
 	err = crypto_register_aeads(aesni_aead_algs,
 				    ARRAY_SIZE(aesni_aead_algs));
 	if (err)
-		goto unregister_algs;
+		goto unregister_skciphers;
+
+	for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
+		algname = aesni_skciphers[i].base.cra_name + 2;
+		drvname = aesni_skciphers[i].base.cra_driver_name + 2;
+		basename = aesni_skciphers[i].base.cra_driver_name;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto unregister_simds;
+
+		aesni_simd_skciphers[i] = simd;
+	}
 
-	return err;
+	for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
+		algname = aesni_simd_skciphers2[i].algname;
+		drvname = aesni_simd_skciphers2[i].drvname;
+		basename = aesni_simd_skciphers2[i].basename;
+		simd = simd_skcipher_create_compat(algname, drvname, basename);
+		err = PTR_ERR(simd);
+		if (IS_ERR(simd))
+			goto unregister_simds;
 
+		aesni_simd_skciphers2[i].simd = simd;
+	}
+
+	return 0;
+
+unregister_simds:
+	aesni_free_simds();
+	crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+unregister_skciphers:
+	crypto_unregister_skciphers(aesni_skciphers,
+				    ARRAY_SIZE(aesni_skciphers));
 unregister_algs:
 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 fpu_exit:
@@ -1461,7 +1189,10 @@ static int __init aesni_init(void)
 
 static void __exit aesni_exit(void)
 {
+	aesni_free_simds();
 	crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
+	crypto_unregister_skciphers(aesni_skciphers,
+				    ARRAY_SIZE(aesni_skciphers));
 	crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 
 	crypto_fpu_exit();
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c
index e7d679e..4066804 100644
--- a/arch/x86/crypto/fpu.c
+++ b/arch/x86/crypto/fpu.c
@@ -11,143 +11,186 @@
  *
  */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crypto.h>
 #include <asm/fpu/api.h>
 
 struct crypto_fpu_ctx {
-	struct crypto_blkcipher *child;
+	struct crypto_skcipher *child;
 };
 
-static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key,
+static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
 			     unsigned int keylen)
 {
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent);
-	struct crypto_blkcipher *child = ctx->child;
+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
 	int err;
 
-	crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				   CRYPTO_TFM_REQ_MASK);
-	err = crypto_blkcipher_setkey(child, key, keylen);
-	crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 	return err;
 }
 
-static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
+static int crypto_fpu_encrypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
 	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-	};
+
+	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_callback(subreq, 0, NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+				   req->iv);
 
 	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes);
+	err = crypto_skcipher_encrypt(subreq);
 	kernel_fpu_end();
+
+	skcipher_request_zero(subreq);
 	return err;
 }
 
-static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in,
-			      struct scatterlist *dst, struct scatterlist *src,
-			      unsigned int nbytes)
+static int crypto_fpu_decrypt(struct skcipher_request *req)
 {
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
 	int err;
-	struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm);
-	struct crypto_blkcipher *child = ctx->child;
-	struct blkcipher_desc desc = {
-		.tfm = child,
-		.info = desc_in->info,
-		.flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
-	};
+
+	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_callback(subreq, 0, NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+				   req->iv);
 
 	kernel_fpu_begin();
-	err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes);
+	err = crypto_skcipher_decrypt(subreq);
 	kernel_fpu_end();
+
+	skcipher_request_zero(subreq);
 	return err;
 }
 
-static int crypto_fpu_init_tfm(struct crypto_tfm *tfm)
+static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct crypto_blkcipher *cipher;
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher_spawn *spawn;
+	struct crypto_skcipher *cipher;
 
-	cipher = crypto_spawn_blkcipher(spawn);
+	spawn = skcipher_instance_ctx(inst);
+	cipher = crypto_spawn_skcipher(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
 	ctx->child = cipher;
+
 	return 0;
 }
 
-static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->child);
+}
+
+static void crypto_fpu_free(struct skcipher_instance *inst)
 {
-	struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_blkcipher(ctx->child);
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
 }
 
-static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb)
+static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct crypto_skcipher_spawn *spawn;
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct skcipher_alg *alg;
+	const char *cipher_name;
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
+	    algt->mask)
+		return -EINVAL;
+
+	if (!(algt->mask & CRYPTO_ALG_INTERNAL))
+		return -EINVAL;
+
+	cipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(cipher_name))
+		return PTR_ERR(cipher_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	spawn = skcipher_instance_ctx(inst);
+
+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
+				   CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
 	if (err)
-		return ERR_PTR(err);
-
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
-
-	inst = crypto_alloc_instance("fpu", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
-
-	inst->alg.cra_flags = alg->cra_flags;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = alg->cra_type;
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize;
-	inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
-	inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
-	inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
-	inst->alg.cra_init = crypto_fpu_init_tfm;
-	inst->alg.cra_exit = crypto_fpu_exit_tfm;
-	inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey;
-	inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt;
-	inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt;
-
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+		goto out_free_inst;
 
-static void crypto_fpu_free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+	alg = crypto_skcipher_spawn_alg(spawn);
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
+				  &alg->base);
+	if (err)
+		goto out_drop_skcipher;
+
+	inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+
+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
+
+	inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
+
+	inst->alg.init = crypto_fpu_init_tfm;
+	inst->alg.exit = crypto_fpu_exit_tfm;
+
+	inst->alg.setkey = crypto_fpu_setkey;
+	inst->alg.encrypt = crypto_fpu_encrypt;
+	inst->alg.decrypt = crypto_fpu_decrypt;
+
+	inst->free = crypto_fpu_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto out_drop_skcipher;
+
+out:
+	return err;
+
+out_drop_skcipher:
+	crypto_drop_skcipher(spawn);
+out_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_fpu_tmpl = {
 	.name = "fpu",
-	.alloc = crypto_fpu_alloc,
-	.free = crypto_fpu_free,
+	.create = crypto_fpu_create,
 	.module = THIS_MODULE,
 };
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 87a09c7..50fa56c 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -942,14 +942,13 @@ config CRYPTO_AES_X86_64
 config CRYPTO_AES_NI_INTEL
 	tristate "AES cipher algorithms (AES-NI)"
 	depends on X86
+	select CRYPTO_AEAD
 	select CRYPTO_AES_X86_64 if 64BIT
 	select CRYPTO_AES_586 if !64BIT
-	select CRYPTO_CRYPTD
-	select CRYPTO_ABLK_HELPER
 	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86 if 64BIT
-	select CRYPTO_LRW
-	select CRYPTO_XTS
+	select CRYPTO_SIMD
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 

^ permalink raw reply related

* [v3 PATCH 9/16] crypto: glue_helper - Add skcipher xts helpers
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch adds xts helpers that use the skcipher interface rather
than blkcipher.  This will be used by aesni_intel.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/x86/crypto/glue_helper.c             |   74 +++++++++++++++++++++++++++++-
 arch/x86/include/asm/crypto/glue_helper.h |   39 +++++++++++++++
 crypto/Kconfig                            |    2 
 3 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 6a85598..260a060 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -27,10 +27,10 @@
 
 #include <linux/module.h>
 #include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/lrw.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
 
 static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
 				   struct blkcipher_desc *desc,
@@ -339,6 +339,41 @@ static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 	return nbytes;
 }
 
+static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+					  void *ctx,
+					  struct skcipher_walk *walk)
+{
+	const unsigned int bsize = 128 / 8;
+	unsigned int nbytes = walk->nbytes;
+	u128 *src = walk->src.virt.addr;
+	u128 *dst = walk->dst.virt.addr;
+	unsigned int num_blocks, func_bytes;
+	unsigned int i;
+
+	/* Process multi-block batch */
+	for (i = 0; i < gctx->num_funcs; i++) {
+		num_blocks = gctx->funcs[i].num_blocks;
+		func_bytes = bsize * num_blocks;
+
+		if (nbytes >= func_bytes) {
+			do {
+				gctx->funcs[i].fn_u.xts(ctx, dst, src,
+							walk->iv);
+
+				src += num_blocks;
+				dst += num_blocks;
+				nbytes -= func_bytes;
+			} while (nbytes >= func_bytes);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+	}
+
+done:
+	return nbytes;
+}
+
 /* for implementations implementing faster XTS IV generator */
 int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 			  struct blkcipher_desc *desc, struct scatterlist *dst,
@@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 }
 EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
 
+int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+			struct skcipher_request *req,
+			common_glue_func_t tweak_fn, void *tweak_ctx,
+			void *crypt_ctx)
+{
+	const unsigned int bsize = 128 / 8;
+	struct skcipher_walk walk;
+	bool fpu_enabled = false;
+	unsigned int nbytes;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	nbytes = walk.nbytes;
+	if (!nbytes)
+		return err;
+
+	/* set minimum length to bsize, for tweak_fn */
+	fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
+					    &walk, fpu_enabled,
+					    nbytes < bsize ? bsize : nbytes);
+
+	/* calculate first value of T */
+	tweak_fn(tweak_ctx, walk.iv, walk.iv);
+
+	while (nbytes) {
+		nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
+
+		err = skcipher_walk_done(&walk, nbytes);
+		nbytes = walk.nbytes;
+	}
+
+	glue_fpu_end(fpu_enabled);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
+
 void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv,
 			       common_glue_func_t fn)
 {
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 03bb106..29e53ea 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -5,8 +5,8 @@
 #ifndef _CRYPTO_GLUE_HELPER_H
 #define _CRYPTO_GLUE_HELPER_H
 
+#include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
-#include <linux/crypto.h>
 #include <asm/fpu/api.h>
 #include <crypto/b128ops.h>
 
@@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
 	return true;
 }
 
+static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
+					 int fpu_blocks_limit,
+					 struct skcipher_walk *walk,
+					 bool fpu_enabled, unsigned int nbytes)
+{
+	if (likely(fpu_blocks_limit < 0))
+		return false;
+
+	if (fpu_enabled)
+		return true;
+
+	/*
+	 * Vector-registers are only used when chunk to be processed is large
+	 * enough, so do not enable FPU until it is necessary.
+	 */
+	if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
+		return false;
+
+	/* prevent sleeping if FPU is in use */
+	skcipher_walk_atomise(walk);
+
+	kernel_fpu_begin();
+	return true;
+}
+
 static inline void glue_fpu_end(bool fpu_enabled)
 {
 	if (fpu_enabled)
@@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
 				 common_glue_func_t tweak_fn, void *tweak_ctx,
 				 void *crypt_ctx);
 
+extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
+				 struct blkcipher_desc *desc,
+				 struct scatterlist *dst,
+				 struct scatterlist *src, unsigned int nbytes,
+				 common_glue_func_t tweak_fn, void *tweak_ctx,
+				 void *crypt_ctx);
+
+extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
+			       struct skcipher_request *req,
+			       common_glue_func_t tweak_fn, void *tweak_ctx,
+			       void *crypt_ctx);
+
 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
 				      le128 *iv, common_glue_func_t fn);
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 3d31181..87a09c7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -253,7 +253,7 @@ config CRYPTO_SIMD
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
-	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
 
 config CRYPTO_ENGINE
 	tristate

^ permalink raw reply related

* [v3 PATCH 7/16] crypto: simd - Add simd skcipher helper
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch adds the simd skcipher helper which is meant to be
a replacement for ablk helper.  It replaces the underlying blkcipher
interface with skcipher, and also presents the top-level algorithm
as an skcipher.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/Kconfig                 |    4 
 crypto/Makefile                |    2 
 crypto/simd.c                  |  226 +++++++++++++++++++++++++++++++++++++++++
 include/crypto/internal/simd.h |   17 +++
 4 files changed, 249 insertions(+)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1db2a19..3d31181 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -246,6 +246,10 @@ config CRYPTO_ABLK_HELPER
 	tristate
 	select CRYPTO_CRYPTD
 
+config CRYPTO_SIMD
+	tristate
+	select CRYPTO_CRYPTD
+
 config CRYPTO_GLUE_HELPER_X86
 	tristate
 	depends on X86
diff --git a/crypto/Makefile b/crypto/Makefile
index 82ffeee..a05590e 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -142,3 +142,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx/
 obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/
 obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
 obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o
+crypto_simd-y := simd.o
+obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o
diff --git a/crypto/simd.c b/crypto/simd.c
new file mode 100644
index 0000000..8820337
--- /dev/null
+++ b/crypto/simd.c
@@ -0,0 +1,226 @@
+/*
+ * Shared crypto simd helpers
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * Based on aesni-intel_glue.c by:
+ *  Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <crypto/cryptd.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <asm/simd.h>
+
+struct simd_skcipher_alg {
+	const char *ialg_name;
+	struct skcipher_alg alg;
+};
+
+struct simd_skcipher_ctx {
+	struct cryptd_skcipher *cryptd_tfm;
+};
+
+static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, key_len);
+	crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) &
+				       CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int simd_skcipher_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_request *subreq;
+	struct crypto_skcipher *child;
+
+	subreq = skcipher_request_ctx(req);
+	*subreq = *req;
+
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
+		child = &ctx->cryptd_tfm->base;
+	else
+		child = cryptd_skcipher_child(ctx->cryptd_tfm);
+
+	skcipher_request_set_tfm(subreq, child);
+
+	return crypto_skcipher_encrypt(subreq);
+}
+
+static int simd_skcipher_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_request *subreq;
+	struct crypto_skcipher *child;
+
+	subreq = skcipher_request_ctx(req);
+	*subreq = *req;
+
+	if (!may_use_simd() ||
+	    (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm)))
+		child = &ctx->cryptd_tfm->base;
+	else
+		child = cryptd_skcipher_child(ctx->cryptd_tfm);
+
+	skcipher_request_set_tfm(subreq, child);
+
+	return crypto_skcipher_decrypt(subreq);
+}
+
+static void simd_skcipher_exit(struct crypto_skcipher *tfm)
+{
+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	cryptd_free_skcipher(ctx->cryptd_tfm);
+}
+
+static int simd_skcipher_init(struct crypto_skcipher *tfm)
+{
+	struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct cryptd_skcipher *cryptd_tfm;
+	struct simd_skcipher_alg *salg;
+	struct skcipher_alg *alg;
+	unsigned reqsize;
+
+	alg = crypto_skcipher_alg(tfm);
+	salg = container_of(alg, struct simd_skcipher_alg, alg);
+
+	cryptd_tfm = cryptd_alloc_skcipher(salg->ialg_name,
+					   CRYPTO_ALG_INTERNAL,
+					   CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	ctx->cryptd_tfm = cryptd_tfm;
+
+	reqsize = sizeof(struct skcipher_request);
+	reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base);
+
+	crypto_skcipher_set_reqsize(tfm, reqsize);
+
+	return 0;
+}
+
+struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
+						      const char *drvname,
+						      const char *basename)
+{
+	struct simd_skcipher_alg *salg;
+	struct crypto_skcipher *tfm;
+	struct skcipher_alg *ialg;
+	struct skcipher_alg *alg;
+	int err;
+
+	tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL,
+				    CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+
+	ialg = crypto_skcipher_alg(tfm);
+
+	salg = kzalloc(sizeof(*salg), GFP_KERNEL);
+	if (!salg) {
+		salg = ERR_PTR(-ENOMEM);
+		goto out_put_tfm;
+	}
+
+	salg->ialg_name = basename;
+	alg = &salg->alg;
+
+	err = -ENAMETOOLONG;
+	if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto out_free_salg;
+
+	if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		     drvname) >= CRYPTO_MAX_ALG_NAME)
+		goto out_free_salg;
+
+	alg->base.cra_flags = CRYPTO_ALG_ASYNC;
+	alg->base.cra_priority = ialg->base.cra_priority;
+	alg->base.cra_blocksize = ialg->base.cra_blocksize;
+	alg->base.cra_alignmask = ialg->base.cra_alignmask;
+	alg->base.cra_module = ialg->base.cra_module;
+	alg->base.cra_ctxsize = sizeof(struct simd_skcipher_ctx);
+
+	alg->ivsize = ialg->ivsize;
+	alg->chunksize = ialg->chunksize;
+	alg->min_keysize = ialg->min_keysize;
+	alg->max_keysize = ialg->max_keysize;
+
+	alg->init = simd_skcipher_init;
+	alg->exit = simd_skcipher_exit;
+
+	alg->setkey = simd_skcipher_setkey;
+	alg->encrypt = simd_skcipher_encrypt;
+	alg->decrypt = simd_skcipher_decrypt;
+
+	err = crypto_register_skcipher(alg);
+	if (err)
+		goto out_free_salg;
+
+out_put_tfm:
+	crypto_free_skcipher(tfm);
+	return salg;
+
+out_free_salg:
+	kfree(salg);
+	salg = ERR_PTR(err);
+	goto out_put_tfm;
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_create_compat);
+
+struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
+					       const char *basename)
+{
+	char drvname[CRYPTO_MAX_ALG_NAME];
+
+	if (snprintf(drvname, CRYPTO_MAX_ALG_NAME, "simd-%s", basename) >=
+	    CRYPTO_MAX_ALG_NAME)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	return simd_skcipher_create_compat(algname, drvname, basename);
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_create);
+
+void simd_skcipher_free(struct simd_skcipher_alg *salg)
+{
+	crypto_unregister_skcipher(&salg->alg);
+	kfree(salg);
+}
+EXPORT_SYMBOL_GPL(simd_skcipher_free);
+
+MODULE_LICENSE("GPL");
diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h
new file mode 100644
index 0000000..4295099
--- /dev/null
+++ b/include/crypto/internal/simd.h
@@ -0,0 +1,17 @@
+/*
+ * Shared crypto simd helpers
+ */
+
+#ifndef _CRYPTO_INTERNAL_SIMD_H
+#define _CRYPTO_INTERNAL_SIMD_H
+
+struct simd_skcipher_alg;
+
+struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname,
+						      const char *drvname,
+						      const char *basename);
+struct simd_skcipher_alg *simd_skcipher_create(const char *algname,
+					       const char *basename);
+void simd_skcipher_free(struct simd_skcipher_alg *alg);
+
+#endif /* _CRYPTO_INTERNAL_SIMD_H */

^ permalink raw reply related

* [v3 PATCH 6/16] crypto: cryptd - Add support for skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch adds skcipher support to cryptd alongside ablkcipher.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/cryptd.c         |  284 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/crypto/cryptd.h |   13 ++
 2 files changed, 294 insertions(+), 3 deletions(-)

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 3fd2a20..0508c48 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -17,9 +17,9 @@
  *
  */
 
-#include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/cryptd.h>
 #include <crypto/crypto_wq.h>
 #include <linux/atomic.h>
@@ -48,6 +48,11 @@ struct cryptd_instance_ctx {
 	struct cryptd_queue *queue;
 };
 
+struct skcipherd_instance_ctx {
+	struct crypto_skcipher_spawn spawn;
+	struct cryptd_queue *queue;
+};
+
 struct hashd_instance_ctx {
 	struct crypto_shash_spawn spawn;
 	struct cryptd_queue *queue;
@@ -67,6 +72,15 @@ struct cryptd_blkcipher_request_ctx {
 	crypto_completion_t complete;
 };
 
+struct cryptd_skcipher_ctx {
+	atomic_t refcnt;
+	struct crypto_skcipher *child;
+};
+
+struct cryptd_skcipher_request_ctx {
+	crypto_completion_t complete;
+};
+
 struct cryptd_hash_ctx {
 	atomic_t refcnt;
 	struct crypto_shash *child;
@@ -430,6 +444,216 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
 	return err;
 }
 
+static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
+				  const u8 *key, unsigned int keylen)
+{
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
+	int err;
+
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+	int refcnt = atomic_read(&ctx->refcnt);
+
+	local_bh_disable();
+	rctx->complete(&req->base, err);
+	local_bh_enable();
+
+	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_skcipher(tfm);
+}
+
+static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
+				    int err)
+{
+	struct skcipher_request *req = skcipher_request_cast(base);
+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+				      NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+				   req->iv);
+
+	err = crypto_skcipher_encrypt(subreq);
+	skcipher_request_zero(subreq);
+
+	req->base.complete = rctx->complete;
+
+out:
+	cryptd_skcipher_complete(req, err);
+}
+
+static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
+				    int err)
+{
+	struct skcipher_request *req = skcipher_request_cast(base);
+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child = ctx->child;
+	SKCIPHER_REQUEST_ON_STACK(subreq, child);
+
+	if (unlikely(err == -EINPROGRESS))
+		goto out;
+
+	skcipher_request_set_tfm(subreq, child);
+	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
+				      NULL, NULL);
+	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
+				   req->iv);
+
+	err = crypto_skcipher_decrypt(subreq);
+	skcipher_request_zero(subreq);
+
+	req->base.complete = rctx->complete;
+
+out:
+	cryptd_skcipher_complete(req, err);
+}
+
+static int cryptd_skcipher_enqueue(struct skcipher_request *req,
+				   crypto_completion_t compl)
+{
+	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct cryptd_queue *queue;
+
+	queue = cryptd_get_queue(crypto_skcipher_tfm(tfm));
+	rctx->complete = req->base.complete;
+	req->base.complete = compl;
+
+	return cryptd_enqueue_request(queue, &req->base);
+}
+
+static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req)
+{
+	return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt);
+}
+
+static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req)
+{
+	return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt);
+}
+
+static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst);
+	struct crypto_skcipher_spawn *spawn = &ictx->spawn;
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *cipher;
+
+	cipher = crypto_spawn_skcipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	crypto_skcipher_set_reqsize(
+		tfm, sizeof(struct cryptd_skcipher_request_ctx));
+	return 0;
+}
+
+static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->child);
+}
+
+static void cryptd_skcipher_free(struct skcipher_instance *inst)
+{
+	struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst);
+
+	crypto_drop_skcipher(&ctx->spawn);
+}
+
+static int cryptd_create_skcipher(struct crypto_template *tmpl,
+				  struct rtattr **tb,
+				  struct cryptd_queue *queue)
+{
+	struct skcipherd_instance_ctx *ctx;
+	struct skcipher_instance *inst;
+	struct skcipher_alg *alg;
+	const char *name;
+	u32 type;
+	u32 mask;
+	int err;
+
+	type = 0;
+	mask = CRYPTO_ALG_ASYNC;
+
+	cryptd_check_internal(tb, &type, &mask);
+
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	ctx = skcipher_instance_ctx(inst);
+	ctx->queue = queue;
+
+	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher(&ctx->spawn, name, type, mask);
+	if (err)
+		goto out_free_inst;
+
+	alg = crypto_spawn_skcipher_alg(&ctx->spawn);
+	err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base);
+	if (err)
+		goto out_drop_skcipher;
+
+	inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
+				   (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
+
+	inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
+	inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
+
+	inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx);
+
+	inst->alg.init = cryptd_skcipher_init_tfm;
+	inst->alg.exit = cryptd_skcipher_exit_tfm;
+
+	inst->alg.setkey = cryptd_skcipher_setkey;
+	inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue;
+	inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue;
+
+	inst->free = cryptd_skcipher_free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err) {
+out_drop_skcipher:
+		crypto_drop_skcipher(&ctx->spawn);
+out_free_inst:
+		kfree(inst);
+	}
+	return err;
+}
+
 static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
@@ -893,7 +1117,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_BLKCIPHER:
-		return cryptd_create_blkcipher(tmpl, tb, &queue);
+		if ((algt->type & CRYPTO_ALG_TYPE_MASK) ==
+		    CRYPTO_ALG_TYPE_BLKCIPHER)
+			return cryptd_create_blkcipher(tmpl, tb, &queue);
+
+		return cryptd_create_skcipher(tmpl, tb, &queue);
 	case CRYPTO_ALG_TYPE_DIGEST:
 		return cryptd_create_hash(tmpl, tb, &queue);
 	case CRYPTO_ALG_TYPE_AEAD:
@@ -983,6 +1211,58 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
 
+struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
+					      u32 type, u32 mask)
+{
+	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct cryptd_skcipher_ctx *ctx;
+	struct crypto_skcipher *tfm;
+
+	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+		return ERR_PTR(-EINVAL);
+
+	tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+
+	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_skcipher(tfm);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ctx = crypto_skcipher_ctx(tfm);
+	atomic_set(&ctx->refcnt, 1);
+
+	return container_of(tfm, struct cryptd_skcipher, base);
+}
+EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher);
+
+struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
+{
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+	return ctx->child;
+}
+EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
+
+bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm)
+{
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+	return atomic_read(&ctx->refcnt) - 1;
+}
+EXPORT_SYMBOL_GPL(cryptd_skcipher_queued);
+
+void cryptd_free_skcipher(struct cryptd_skcipher *tfm)
+{
+	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
+
+	if (atomic_dec_and_test(&ctx->refcnt))
+		crypto_free_skcipher(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(cryptd_free_skcipher);
+
 struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
 					u32 type, u32 mask)
 {
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index bc792d5..94418cb 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -12,10 +12,10 @@
 #ifndef _CRYPTO_CRYPT_H
 #define _CRYPTO_CRYPT_H
 
-#include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <crypto/aead.h>
 #include <crypto/hash.h>
+#include <crypto/skcipher.h>
 
 struct cryptd_ablkcipher {
 	struct crypto_ablkcipher base;
@@ -34,6 +34,17 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm);
 void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm);
 
+struct cryptd_skcipher {
+	struct crypto_skcipher base;
+};
+
+struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name,
+					      u32 type, u32 mask);
+struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm);
+/* Must be called without moving CPUs. */
+bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm);
+void cryptd_free_skcipher(struct cryptd_skcipher *tfm);
+
 struct cryptd_ahash {
 	struct crypto_ahash base;
 };

^ permalink raw reply related

* [v3 PATCH 5/16] crypto: api - Do not clear type bits in crypto_larval_lookup
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

Currently all bits not set in mask are cleared in crypto_larval_lookup.
This is unnecessary as wherever the type bits are used it is always
masked anyway.

This patch removes the clearing so that we may use bits set in the
type but not in the mask for special purposes, e.g., picking up
internal algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/api.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/api.c b/crypto/api.c
index a88729f..b16ce16 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -211,8 +211,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 	if (!name)
 		return ERR_PTR(-ENOENT);
 
+	type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
 	mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD);
-	type &= mask;
 
 	alg = crypto_alg_lookup(name, type, mask);
 	if (!alg) {

^ permalink raw reply related

* [v3 PATCH 4/16] crypto: xts - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts xts over to the skcipher interface.  It also
optimises the implementation to be based on ECB instead of the
underlying cipher.  For compatibility the existing naming scheme
of xts(aes) is maintained as opposed to the more obvious one of
xts(ecb(aes)).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/xts.c         |  547 ++++++++++++++++++++++++++++++++++++---------------
 include/crypto/xts.h |   26 ++
 2 files changed, 416 insertions(+), 157 deletions(-)

diff --git a/crypto/xts.c b/crypto/xts.c
index 305343f..410a2e2 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -13,7 +13,8 @@
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version.
  */
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -25,140 +26,320 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 
+#define XTS_BUFFER_SIZE 128u
+
 struct priv {
-	struct crypto_cipher *child;
+	struct crypto_skcipher *child;
 	struct crypto_cipher *tweak;
 };
 
-static int setkey(struct crypto_tfm *parent, const u8 *key,
+struct xts_instance_ctx {
+	struct crypto_skcipher_spawn spawn;
+	char name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct rctx {
+	be128 buf[XTS_BUFFER_SIZE / sizeof(be128)];
+
+	be128 t;
+
+	be128 *ext;
+
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct scatterlist *src;
+	struct scatterlist *dst;
+
+	unsigned int left;
+
+	struct skcipher_request subreq;
+};
+
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
 		  unsigned int keylen)
 {
-	struct priv *ctx = crypto_tfm_ctx(parent);
-	struct crypto_cipher *child = ctx->tweak;
+	struct priv *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child;
+	struct crypto_cipher *tweak;
 	int err;
 
-	err = xts_check_key(parent, key, keylen);
+	err = xts_verify_key(parent, key, keylen);
 	if (err)
 		return err;
 
+	keylen /= 2;
+
 	/* we need two cipher instances: one to compute the initial 'tweak'
 	 * by encrypting the IV (usually the 'plain' iv) and the other
 	 * one to encrypt and decrypt the data */
 
 	/* tweak cipher, uses Key2 i.e. the second half of *key */
-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
+	tweak = ctx->tweak;
+	crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) &
 				       CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(child, key + keylen/2, keylen/2);
+	err = crypto_cipher_setkey(tweak, key + keylen, keylen);
+	crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) &
+					  CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
 
-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
-
+	/* data cipher, uses Key1 i.e. the first half of *key */
 	child = ctx->child;
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 
-	/* data cipher, uses Key1 i.e. the first half of *key */
-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				       CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(child, key, keylen/2);
-	if (err)
-		return err;
+	return err;
+}
 
-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
+static int post_crypt(struct skcipher_request *req)
+{
+	struct rctx *rctx = skcipher_request_ctx(req);
+	be128 *buf = rctx->ext ?: rctx->buf;
+	struct skcipher_request *subreq;
+	const int bs = XTS_BLOCK_SIZE;
+	struct skcipher_walk w;
+	struct scatterlist *sg;
+	unsigned offset;
+	int err;
 
-	return 0;
-}
+	subreq = &rctx->subreq;
+	err = skcipher_walk_virt(&w, subreq, false);
 
-struct sinfo {
-	be128 *t;
-	struct crypto_tfm *tfm;
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
-};
+	while (w.nbytes) {
+		unsigned int avail = w.nbytes;
+		be128 *wdst;
 
-static inline void xts_round(struct sinfo *s, void *dst, const void *src)
-{
-	be128_xor(dst, s->t, src);		/* PP <- T xor P */
-	s->fn(s->tfm, dst, dst);		/* CC <- E(Key1,PP) */
-	be128_xor(dst, dst, s->t);		/* C <- T xor CC */
+		wdst = w.dst.virt.addr;
+
+		do {
+			be128_xor(wdst, buf++, wdst);
+			wdst++;
+		} while ((avail -= bs) >= bs);
+
+		err = skcipher_walk_done(&w, avail);
+	}
+
+	rctx->left -= subreq->cryptlen;
+
+	if (err || !rctx->left)
+		goto out;
+
+	rctx->dst = rctx->dstbuf;
+
+	scatterwalk_done(&w.out, 0, 1);
+	sg = w.out.sg;
+	offset = w.out.offset;
+
+	if (rctx->dst != sg) {
+		rctx->dst[0] = *sg;
+		sg_unmark_end(rctx->dst);
+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+	}
+	rctx->dst[0].length -= offset - sg->offset;
+	rctx->dst[0].offset = offset;
+
+out:
+	return err;
 }
 
-static int crypt(struct blkcipher_desc *d,
-		 struct blkcipher_walk *w, struct priv *ctx,
-		 void (*tw)(struct crypto_tfm *, u8 *, const u8 *),
-		 void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+static int pre_crypt(struct skcipher_request *req)
 {
-	int err;
-	unsigned int avail;
+	struct rctx *rctx = skcipher_request_ctx(req);
+	be128 *buf = rctx->ext ?: rctx->buf;
+	struct skcipher_request *subreq;
 	const int bs = XTS_BLOCK_SIZE;
-	struct sinfo s = {
-		.tfm = crypto_cipher_tfm(ctx->child),
-		.fn = fn
-	};
-	u8 *wsrc;
-	u8 *wdst;
-
-	err = blkcipher_walk_virt(d, w);
-	if (!w->nbytes)
-		return err;
+	struct skcipher_walk w;
+	struct scatterlist *sg;
+	unsigned cryptlen;
+	unsigned offset;
+	bool more;
+	int err;
 
-	s.t = (be128 *)w->iv;
-	avail = w->nbytes;
+	subreq = &rctx->subreq;
+	cryptlen = subreq->cryptlen;
 
-	wsrc = w->src.virt.addr;
-	wdst = w->dst.virt.addr;
+	more = rctx->left > cryptlen;
+	if (!more)
+		cryptlen = rctx->left;
 
-	/* calculate first value of T */
-	tw(crypto_cipher_tfm(ctx->tweak), w->iv, w->iv);
+	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
+				   cryptlen, NULL);
 
-	goto first;
+	err = skcipher_walk_virt(&w, subreq, false);
 
-	for (;;) {
-		do {
-			gf128mul_x_ble(s.t, s.t);
+	while (w.nbytes) {
+		unsigned int avail = w.nbytes;
+		be128 *wsrc;
+		be128 *wdst;
 
-first:
-			xts_round(&s, wdst, wsrc);
+		wsrc = w.src.virt.addr;
+		wdst = w.dst.virt.addr;
 
-			wsrc += bs;
-			wdst += bs;
+		do {
+			*buf++ = rctx->t;
+			be128_xor(wdst++, &rctx->t, wsrc++);
+			gf128mul_x_ble(&rctx->t, &rctx->t);
 		} while ((avail -= bs) >= bs);
 
-		err = blkcipher_walk_done(d, w, avail);
-		if (!w->nbytes)
-			break;
+		err = skcipher_walk_done(&w, avail);
+	}
+
+	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
+				   cryptlen, NULL);
 
-		avail = w->nbytes;
+	if (err || !more)
+		goto out;
 
-		wsrc = w->src.virt.addr;
-		wdst = w->dst.virt.addr;
+	rctx->src = rctx->srcbuf;
+
+	scatterwalk_done(&w.in, 0, 1);
+	sg = w.in.sg;
+	offset = w.in.offset;
+
+	if (rctx->src != sg) {
+		rctx->src[0] = *sg;
+		sg_unmark_end(rctx->src);
+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
 	}
+	rctx->src[0].length -= offset - sg->offset;
+	rctx->src[0].offset = offset;
 
+out:
 	return err;
 }
 
-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		   struct scatterlist *src, unsigned int nbytes)
+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
 {
-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk w;
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+	gfp_t gfp;
+
+	subreq = &rctx->subreq;
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_callback(subreq, req->base.flags, done, req);
+
+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+							   GFP_ATOMIC;
+	rctx->ext = NULL;
+
+	subreq->cryptlen = XTS_BUFFER_SIZE;
+	if (req->cryptlen > XTS_BUFFER_SIZE) {
+		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
+		rctx->ext = kmalloc(subreq->cryptlen, gfp);
+	}
+
+	rctx->src = req->src;
+	rctx->dst = req->dst;
+	rctx->left = req->cryptlen;
 
-	blkcipher_walk_init(&w, dst, src, nbytes);
-	return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
-		     crypto_cipher_alg(ctx->child)->cia_encrypt);
+	/* calculate first value of T */
+	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
+
+	return 0;
 }
 
-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		   struct scatterlist *src, unsigned int nbytes)
+static void exit_crypt(struct skcipher_request *req)
 {
-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk w;
+	struct rctx *rctx = skcipher_request_ctx(req);
+
+	rctx->left = 0;
 
-	blkcipher_walk_init(&w, dst, src, nbytes);
-	return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt,
-		     crypto_cipher_alg(ctx->child)->cia_decrypt);
+	if (rctx->ext)
+		kzfree(rctx->ext);
+}
+
+static int do_encrypt(struct skcipher_request *req, int err)
+{
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+
+	subreq = &rctx->subreq;
+
+	while (!err && rctx->left) {
+		err = pre_crypt(req) ?:
+		      crypto_skcipher_encrypt(subreq) ?:
+		      post_crypt(req);
+
+		if (err == -EINPROGRESS ||
+		    (err == -EBUSY &&
+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return err;
+	}
+
+	exit_crypt(req);
+	return err;
+}
+
+static void encrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+	struct skcipher_request *subreq;
+	struct rctx *rctx;
+
+	rctx = skcipher_request_ctx(req);
+	subreq = &rctx->subreq;
+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+	err = do_encrypt(req, err ?: post_crypt(req));
+	if (rctx->left)
+		return;
+
+	skcipher_request_complete(req, err);
+}
+
+static int encrypt(struct skcipher_request *req)
+{
+	return do_encrypt(req, init_crypt(req, encrypt_done));
+}
+
+static int do_decrypt(struct skcipher_request *req, int err)
+{
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+
+	subreq = &rctx->subreq;
+
+	while (!err && rctx->left) {
+		err = pre_crypt(req) ?:
+		      crypto_skcipher_decrypt(subreq) ?:
+		      post_crypt(req);
+
+		if (err == -EINPROGRESS ||
+		    (err == -EBUSY &&
+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return err;
+	}
+
+	exit_crypt(req);
+	return err;
+}
+
+static void decrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+	struct skcipher_request *subreq;
+	struct rctx *rctx;
+
+	rctx = skcipher_request_ctx(req);
+	subreq = &rctx->subreq;
+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+	err = do_decrypt(req, err ?: post_crypt(req));
+	if (rctx->left)
+		return;
+
+	skcipher_request_complete(req, err);
+}
+
+static int decrypt(struct skcipher_request *req)
+{
+	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
 int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
@@ -233,112 +414,168 @@ int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
 }
 EXPORT_SYMBOL_GPL(xts_crypt);
 
-static int init_tfm(struct crypto_tfm *tfm)
+static int init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_cipher *cipher;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct priv *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-
-	cipher = crypto_spawn_cipher(spawn);
-	if (IS_ERR(cipher))
-		return PTR_ERR(cipher);
-
-	if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-		crypto_free_cipher(cipher);
-		return -EINVAL;
-	}
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst);
+	struct priv *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *child;
+	struct crypto_cipher *tweak;
 
-	ctx->child = cipher;
+	child = crypto_spawn_skcipher(&ictx->spawn);
+	if (IS_ERR(child))
+		return PTR_ERR(child);
 
-	cipher = crypto_spawn_cipher(spawn);
-	if (IS_ERR(cipher)) {
-		crypto_free_cipher(ctx->child);
-		return PTR_ERR(cipher);
-	}
+	ctx->child = child;
 
-	/* this check isn't really needed, leave it here just in case */
-	if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) {
-		crypto_free_cipher(cipher);
-		crypto_free_cipher(ctx->child);
-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-		return -EINVAL;
+	tweak = crypto_alloc_cipher(ictx->name, 0, 0);
+	if (IS_ERR(tweak)) {
+		crypto_free_skcipher(ctx->child);
+		return PTR_ERR(tweak);
 	}
 
-	ctx->tweak = cipher;
+	ctx->tweak = tweak;
+
+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) +
+					 sizeof(struct rctx));
 
 	return 0;
 }
 
-static void exit_tfm(struct crypto_tfm *tfm)
+static void exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct priv *ctx = crypto_tfm_ctx(tfm);
-	crypto_free_cipher(ctx->child);
+	struct priv *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_skcipher(ctx->child);
 	crypto_free_cipher(ctx->tweak);
 }
 
-static struct crypto_instance *alloc(struct rtattr **tb)
+static void free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+static int create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct xts_instance_ctx *ctx;
+	struct skcipher_alg *alg;
+	const char *cipher_name;
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
+
+	cipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(cipher_name))
+		return PTR_ERR(cipher_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	ctx = skcipher_instance_ctx(inst);
+
+	crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0,
+				   crypto_requires_sync(algt->type,
+							algt->mask));
+	if (err == -ENOENT) {
+		err = -ENAMETOOLONG;
+		if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
+			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
+			goto err_free_inst;
+
+		err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0,
+					   crypto_requires_sync(algt->type,
+								algt->mask));
+	}
+
 	if (err)
-		return ERR_PTR(err);
+		goto err_free_inst;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+	alg = crypto_skcipher_spawn_alg(&ctx->spawn);
 
-	inst = crypto_alloc_instance("xts", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = -EINVAL;
+	if (alg->base.cra_blocksize != XTS_BLOCK_SIZE)
+		goto err_drop_spawn;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
+	if (crypto_skcipher_alg_ivsize(alg))
+		goto err_drop_spawn;
 
-	if (alg->cra_alignmask < 7)
-		inst->alg.cra_alignmask = 7;
-	else
-		inst->alg.cra_alignmask = alg->cra_alignmask;
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts",
+				  &alg->base);
+	if (err)
+		goto err_drop_spawn;
 
-	inst->alg.cra_type = &crypto_blkcipher_type;
+	err = -EINVAL;
+	cipher_name = alg->base.cra_name;
 
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-	inst->alg.cra_blkcipher.min_keysize =
-		2 * alg->cra_cipher.cia_min_keysize;
-	inst->alg.cra_blkcipher.max_keysize =
-		2 * alg->cra_cipher.cia_max_keysize;
+	/* Alas we screwed up the naming so we have to mangle the
+	 * cipher name.
+	 */
+	if (!strncmp(cipher_name, "ecb(", 4)) {
+		unsigned len;
 
-	inst->alg.cra_ctxsize = sizeof(struct priv);
+		len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name));
+		if (len < 2 || len >= sizeof(ctx->name))
+			goto err_drop_spawn;
 
-	inst->alg.cra_init = init_tfm;
-	inst->alg.cra_exit = exit_tfm;
+		if (ctx->name[len - 1] != ')')
+			goto err_drop_spawn;
 
-	inst->alg.cra_blkcipher.setkey = setkey;
-	inst->alg.cra_blkcipher.encrypt = encrypt;
-	inst->alg.cra_blkcipher.decrypt = decrypt;
+		ctx->name[len - 1] = 0;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+		if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+			     "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME)
+			return -ENAMETOOLONG;
+	} else
+		goto err_drop_spawn;
 
-static void free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
+				       (__alignof__(u64) - 1);
+
+	inst->alg.ivsize = XTS_BLOCK_SIZE;
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2;
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct priv);
+
+	inst->alg.init = init_tfm;
+	inst->alg.exit = exit_tfm;
+
+	inst->alg.setkey = setkey;
+	inst->alg.encrypt = encrypt;
+	inst->alg.decrypt = decrypt;
+
+	inst->free = free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_skcipher(&ctx->spawn);
+err_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_tmpl = {
 	.name = "xts",
-	.alloc = alloc,
-	.free = free,
+	.create = create,
 	.module = THIS_MODULE,
 };
 
diff --git a/include/crypto/xts.h b/include/crypto/xts.h
index ede6b97..77b6306 100644
--- a/include/crypto/xts.h
+++ b/include/crypto/xts.h
@@ -2,8 +2,7 @@
 #define _CRYPTO_XTS_H
 
 #include <crypto/b128ops.h>
-#include <linux/crypto.h>
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/fips.h>
 
 struct scatterlist;
@@ -51,4 +50,27 @@ static inline int xts_check_key(struct crypto_tfm *tfm,
 	return 0;
 }
 
+static inline int xts_verify_key(struct crypto_skcipher *tfm,
+				 const u8 *key, unsigned int keylen)
+{
+	/*
+	 * key consists of keys of equal size concatenated, therefore
+	 * the length must be even.
+	 */
+	if (keylen % 2) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	/* ensure that the AES and tweak key are not identical */
+	if ((fips_enabled || crypto_skcipher_get_flags(tfm) &
+			     CRYPTO_TFM_REQ_WEAK_KEY) &&
+	    !crypto_memneq(key, key + (keylen / 2), keylen / 2)) {
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 #endif  /* _CRYPTO_XTS_H */

^ permalink raw reply related

* [v3 PATCH 3/16] crypto: lrw - Convert to skcipher
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch converts lrw over to the skcipher interface.  It also
optimises the implementation to be based on ECB instead of the
underlying cipher.  For compatibility the existing naming scheme
of lrw(aes) is maintained as opposed to the more obvious one of
lrw(ecb(aes)).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/lrw.c |  507 ++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 380 insertions(+), 127 deletions(-)

diff --git a/crypto/lrw.c b/crypto/lrw.c
index 6f9908a..ecd8474 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -17,7 +17,8 @@
  *
  * The test vectors are included in the testing module tcrypt.[ch] */
 
-#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -29,11 +30,30 @@
 #include <crypto/gf128mul.h>
 #include <crypto/lrw.h>
 
+#define LRW_BUFFER_SIZE 128u
+
 struct priv {
-	struct crypto_cipher *child;
+	struct crypto_skcipher *child;
 	struct lrw_table_ctx table;
 };
 
+struct rctx {
+	be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
+
+	be128 t;
+
+	be128 *ext;
+
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct scatterlist *src;
+	struct scatterlist *dst;
+
+	unsigned int left;
+
+	struct skcipher_request subreq;
+};
+
 static inline void setbit128_bbe(void *b, int bit)
 {
 	__set_bit(bit ^ (0x80 -
@@ -76,32 +96,26 @@ void lrw_free_table(struct lrw_table_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(lrw_free_table);
 
-static int setkey(struct crypto_tfm *parent, const u8 *key,
+static int setkey(struct crypto_skcipher *parent, const u8 *key,
 		  unsigned int keylen)
 {
-	struct priv *ctx = crypto_tfm_ctx(parent);
-	struct crypto_cipher *child = ctx->child;
+	struct priv *ctx = crypto_skcipher_ctx(parent);
+	struct crypto_skcipher *child = ctx->child;
 	int err, bsize = LRW_BLOCK_SIZE;
 	const u8 *tweak = key + keylen - bsize;
 
-	crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) &
-				       CRYPTO_TFM_REQ_MASK);
-	err = crypto_cipher_setkey(child, key, keylen - bsize);
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
+					 CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(child, key, keylen - bsize);
+	crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
+					  CRYPTO_TFM_RES_MASK);
 	if (err)
 		return err;
-	crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) &
-				     CRYPTO_TFM_RES_MASK);
 
 	return lrw_init_table(&ctx->table, tweak);
 }
 
-struct sinfo {
-	be128 t;
-	struct crypto_tfm *tfm;
-	void (*fn)(struct crypto_tfm *, u8 *, const u8 *);
-};
-
 static inline void inc(be128 *iv)
 {
 	be64_add_cpu(&iv->b, 1);
@@ -109,13 +123,6 @@ static inline void inc(be128 *iv)
 		be64_add_cpu(&iv->a, 1);
 }
 
-static inline void lrw_round(struct sinfo *s, void *dst, const void *src)
-{
-	be128_xor(dst, &s->t, src);		/* PP <- T xor P */
-	s->fn(s->tfm, dst, dst);		/* CC <- E(Key2,PP) */
-	be128_xor(dst, dst, &s->t);		/* C <- T xor CC */
-}
-
 /* this returns the number of consequative 1 bits starting
  * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
 static inline int get_index128(be128 *block)
@@ -135,83 +142,263 @@ static inline int get_index128(be128 *block)
 	return x;
 }
 
-static int crypt(struct blkcipher_desc *d,
-		 struct blkcipher_walk *w, struct priv *ctx,
-		 void (*fn)(struct crypto_tfm *, u8 *, const u8 *))
+static int post_crypt(struct skcipher_request *req)
 {
+	struct rctx *rctx = skcipher_request_ctx(req);
+	be128 *buf = rctx->ext ?: rctx->buf;
+	struct skcipher_request *subreq;
+	const int bs = LRW_BLOCK_SIZE;
+	struct skcipher_walk w;
+	struct scatterlist *sg;
+	unsigned offset;
 	int err;
-	unsigned int avail;
+
+	subreq = &rctx->subreq;
+	err = skcipher_walk_virt(&w, subreq, false);
+
+	while (w.nbytes) {
+		unsigned int avail = w.nbytes;
+		be128 *wdst;
+
+		wdst = w.dst.virt.addr;
+
+		do {
+			be128_xor(wdst, buf++, wdst);
+			wdst++;
+		} while ((avail -= bs) >= bs);
+
+		err = skcipher_walk_done(&w, avail);
+	}
+
+	rctx->left -= subreq->cryptlen;
+
+	if (err || !rctx->left)
+		goto out;
+
+	rctx->dst = rctx->dstbuf;
+
+	scatterwalk_done(&w.out, 0, 1);
+	sg = w.out.sg;
+	offset = w.out.offset;
+
+	if (rctx->dst != sg) {
+		rctx->dst[0] = *sg;
+		sg_unmark_end(rctx->dst);
+		scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2);
+	}
+	rctx->dst[0].length -= offset - sg->offset;
+	rctx->dst[0].offset = offset;
+
+out:
+	return err;
+}
+
+static int pre_crypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct priv *ctx = crypto_skcipher_ctx(tfm);
+	be128 *buf = rctx->ext ?: rctx->buf;
+	struct skcipher_request *subreq;
 	const int bs = LRW_BLOCK_SIZE;
-	struct sinfo s = {
-		.tfm = crypto_cipher_tfm(ctx->child),
-		.fn = fn
-	};
+	struct skcipher_walk w;
+	struct scatterlist *sg;
+	unsigned cryptlen;
+	unsigned offset;
 	be128 *iv;
-	u8 *wsrc;
-	u8 *wdst;
+	bool more;
+	int err;
 
-	err = blkcipher_walk_virt(d, w);
-	if (!(avail = w->nbytes))
-		return err;
+	subreq = &rctx->subreq;
+	skcipher_request_set_tfm(subreq, tfm);
 
-	wsrc = w->src.virt.addr;
-	wdst = w->dst.virt.addr;
+	cryptlen = subreq->cryptlen;
+	more = rctx->left > cryptlen;
+	if (!more)
+		cryptlen = rctx->left;
 
-	/* calculate first value of T */
-	iv = (be128 *)w->iv;
-	s.t = *iv;
+	skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
+				   cryptlen, req->iv);
 
-	/* T <- I*Key2 */
-	gf128mul_64k_bbe(&s.t, ctx->table.table);
+	err = skcipher_walk_virt(&w, subreq, false);
+	iv = w.iv;
 
-	goto first;
+	while (w.nbytes) {
+		unsigned int avail = w.nbytes;
+		be128 *wsrc;
+		be128 *wdst;
+
+		wsrc = w.src.virt.addr;
+		wdst = w.dst.virt.addr;
 
-	for (;;) {
 		do {
+			*buf++ = rctx->t;
+			be128_xor(wdst++, &rctx->t, wsrc++);
+
 			/* T <- I*Key2, using the optimization
 			 * discussed in the specification */
-			be128_xor(&s.t, &s.t,
+			be128_xor(&rctx->t, &rctx->t,
 				  &ctx->table.mulinc[get_index128(iv)]);
 			inc(iv);
+		} while ((avail -= bs) >= bs);
 
-first:
-			lrw_round(&s, wdst, wsrc);
+		err = skcipher_walk_done(&w, avail);
+	}
 
-			wsrc += bs;
-			wdst += bs;
-		} while ((avail -= bs) >= bs);
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
+				   cryptlen, NULL);
 
-		err = blkcipher_walk_done(d, w, avail);
-		if (!(avail = w->nbytes))
-			break;
+	if (err || !more)
+		goto out;
+
+	rctx->src = rctx->srcbuf;
+
+	scatterwalk_done(&w.in, 0, 1);
+	sg = w.in.sg;
+	offset = w.in.offset;
+
+	if (rctx->src != sg) {
+		rctx->src[0] = *sg;
+		sg_unmark_end(rctx->src);
+		scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2);
+	}
+	rctx->src[0].length -= offset - sg->offset;
+	rctx->src[0].offset = offset;
+
+out:
+	return err;
+}
+
+static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
+{
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+	gfp_t gfp;
+
+	subreq = &rctx->subreq;
+	skcipher_request_set_callback(subreq, req->base.flags, done, req);
+
+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
+							   GFP_ATOMIC;
+	rctx->ext = NULL;
+
+	subreq->cryptlen = LRW_BUFFER_SIZE;
+	if (req->cryptlen > LRW_BUFFER_SIZE) {
+		subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
+		rctx->ext = kmalloc(subreq->cryptlen, gfp);
+	}
+
+	rctx->src = req->src;
+	rctx->dst = req->dst;
+	rctx->left = req->cryptlen;
+
+	/* calculate first value of T */
+	memcpy(&rctx->t, req->iv, sizeof(rctx->t));
+
+	/* T <- I*Key2 */
+	gf128mul_64k_bbe(&rctx->t, ctx->table.table);
 
-		wsrc = w->src.virt.addr;
-		wdst = w->dst.virt.addr;
+	return 0;
+}
+
+static void exit_crypt(struct skcipher_request *req)
+{
+	struct rctx *rctx = skcipher_request_ctx(req);
+
+	rctx->left = 0;
+
+	if (rctx->ext)
+		kfree(rctx->ext);
+}
+
+static int do_encrypt(struct skcipher_request *req, int err)
+{
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+
+	subreq = &rctx->subreq;
+
+	while (!err && rctx->left) {
+		err = pre_crypt(req) ?:
+		      crypto_skcipher_encrypt(subreq) ?:
+		      post_crypt(req);
+
+		if (err == -EINPROGRESS ||
+		    (err == -EBUSY &&
+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return err;
 	}
 
+	exit_crypt(req);
 	return err;
 }
 
-static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		   struct scatterlist *src, unsigned int nbytes)
+static void encrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+	struct skcipher_request *subreq;
+	struct rctx *rctx;
+
+	rctx = skcipher_request_ctx(req);
+	subreq = &rctx->subreq;
+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+	err = do_encrypt(req, err ?: post_crypt(req));
+	if (rctx->left)
+		return;
+
+	skcipher_request_complete(req, err);
+}
+
+static int encrypt(struct skcipher_request *req)
+{
+	return do_encrypt(req, init_crypt(req, encrypt_done));
+}
+
+static int do_decrypt(struct skcipher_request *req, int err)
 {
-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk w;
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq;
+
+	subreq = &rctx->subreq;
+
+	while (!err && rctx->left) {
+		err = pre_crypt(req) ?:
+		      crypto_skcipher_decrypt(subreq) ?:
+		      post_crypt(req);
+
+		if (err == -EINPROGRESS ||
+		    (err == -EBUSY &&
+		     req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return err;
+	}
 
-	blkcipher_walk_init(&w, dst, src, nbytes);
-	return crypt(desc, &w, ctx,
-		     crypto_cipher_alg(ctx->child)->cia_encrypt);
+	exit_crypt(req);
+	return err;
 }
 
-static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
-		   struct scatterlist *src, unsigned int nbytes)
+static void decrypt_done(struct crypto_async_request *areq, int err)
 {
-	struct priv *ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk w;
+	struct skcipher_request *req = areq->data;
+	struct skcipher_request *subreq;
+	struct rctx *rctx;
+
+	rctx = skcipher_request_ctx(req);
+	subreq = &rctx->subreq;
+	subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
+
+	err = do_decrypt(req, err ?: post_crypt(req));
+	if (rctx->left)
+		return;
 
-	blkcipher_walk_init(&w, dst, src, nbytes);
-	return crypt(desc, &w, ctx,
-		     crypto_cipher_alg(ctx->child)->cia_decrypt);
+	skcipher_request_complete(req, err);
+}
+
+static int decrypt(struct skcipher_request *req)
+{
+	return do_decrypt(req, init_crypt(req, decrypt_done));
 }
 
 int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
@@ -293,95 +480,161 @@ int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst,
 }
 EXPORT_SYMBOL_GPL(lrw_crypt);
 
-static int init_tfm(struct crypto_tfm *tfm)
+static int init_tfm(struct crypto_skcipher *tfm)
 {
-	struct crypto_cipher *cipher;
-	struct crypto_instance *inst = (void *)tfm->__crt_alg;
-	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
-	struct priv *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst);
+	struct priv *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_skcipher *cipher;
 
-	cipher = crypto_spawn_cipher(spawn);
+	cipher = crypto_spawn_skcipher(spawn);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	if (crypto_cipher_blocksize(cipher) != LRW_BLOCK_SIZE) {
-		*flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN;
-		crypto_free_cipher(cipher);
-		return -EINVAL;
-	}
-
 	ctx->child = cipher;
+
+	crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) +
+					 sizeof(struct rctx));
+
 	return 0;
 }
 
-static void exit_tfm(struct crypto_tfm *tfm)
+static void exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct priv *ctx = crypto_tfm_ctx(tfm);
+	struct priv *ctx = crypto_skcipher_ctx(tfm);
 
 	lrw_free_table(&ctx->table);
-	crypto_free_cipher(ctx->child);
+	crypto_free_skcipher(ctx->child);
+}
+
+static void free(struct skcipher_instance *inst)
+{
+	crypto_drop_skcipher(skcipher_instance_ctx(inst));
+	kfree(inst);
 }
 
-static struct crypto_instance *alloc(struct rtattr **tb)
+static int create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct crypto_skcipher_spawn *spawn;
+	struct skcipher_instance *inst;
+	struct crypto_attr_type *algt;
+	struct skcipher_alg *alg;
+	const char *cipher_name;
+	char ecb_name[CRYPTO_MAX_ALG_NAME];
 	int err;
 
-	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER);
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return PTR_ERR(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return -EINVAL;
+
+	cipher_name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(cipher_name))
+		return PTR_ERR(cipher_name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	spawn = skcipher_instance_ctx(inst);
+
+	crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
+	err = crypto_grab_skcipher(spawn, cipher_name, 0,
+				   crypto_requires_sync(algt->type,
+							algt->mask));
+	if (err == -ENOENT) {
+		err = -ENAMETOOLONG;
+		if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)",
+			     cipher_name) >= CRYPTO_MAX_ALG_NAME)
+			goto err_free_inst;
+
+		err = crypto_grab_skcipher(spawn, ecb_name, 0,
+					   crypto_requires_sync(algt->type,
+								algt->mask));
+	}
+
 	if (err)
-		return ERR_PTR(err);
+		goto err_free_inst;
 
-	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
-				  CRYPTO_ALG_TYPE_MASK);
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+	alg = crypto_skcipher_spawn_alg(spawn);
 
-	inst = crypto_alloc_instance("lrw", alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	err = -EINVAL;
+	if (alg->base.cra_blocksize != LRW_BLOCK_SIZE)
+		goto err_drop_spawn;
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
+	if (crypto_skcipher_alg_ivsize(alg))
+		goto err_drop_spawn;
 
-	if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7;
-	else inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_blkcipher_type;
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw",
+				  &alg->base);
+	if (err)
+		goto err_drop_spawn;
 
-	if (!(alg->cra_blocksize % 4))
-		inst->alg.cra_alignmask |= 3;
-	inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
-	inst->alg.cra_blkcipher.min_keysize =
-		alg->cra_cipher.cia_min_keysize + alg->cra_blocksize;
-	inst->alg.cra_blkcipher.max_keysize =
-		alg->cra_cipher.cia_max_keysize + alg->cra_blocksize;
+	err = -EINVAL;
+	cipher_name = alg->base.cra_name;
 
-	inst->alg.cra_ctxsize = sizeof(struct priv);
+	/* Alas we screwed up the naming so we have to mangle the
+	 * cipher name.
+	 */
+	if (!strncmp(cipher_name, "ecb(", 4)) {
+		unsigned len;
 
-	inst->alg.cra_init = init_tfm;
-	inst->alg.cra_exit = exit_tfm;
+		len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name));
+		if (len < 2 || len >= sizeof(ecb_name))
+			goto err_drop_spawn;
 
-	inst->alg.cra_blkcipher.setkey = setkey;
-	inst->alg.cra_blkcipher.encrypt = encrypt;
-	inst->alg.cra_blkcipher.decrypt = decrypt;
+		if (ecb_name[len - 1] != ')')
+			goto err_drop_spawn;
 
-out_put_alg:
-	crypto_mod_put(alg);
-	return inst;
-}
+		ecb_name[len - 1] = 0;
 
-static void free(struct crypto_instance *inst)
-{
-	crypto_drop_spawn(crypto_instance_ctx(inst));
+		if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+			     "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME)
+			return -ENAMETOOLONG;
+	}
+
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
+				       (__alignof__(u64) - 1);
+
+	inst->alg.ivsize = LRW_BLOCK_SIZE;
+	inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
+				LRW_BLOCK_SIZE;
+	inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) +
+				LRW_BLOCK_SIZE;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct priv);
+
+	inst->alg.init = init_tfm;
+	inst->alg.exit = exit_tfm;
+
+	inst->alg.setkey = setkey;
+	inst->alg.encrypt = encrypt;
+	inst->alg.decrypt = decrypt;
+
+	inst->free = free;
+
+	err = skcipher_register_instance(tmpl, inst);
+	if (err)
+		goto err_drop_spawn;
+
+out:
+	return err;
+
+err_drop_spawn:
+	crypto_drop_skcipher(spawn);
+err_free_inst:
 	kfree(inst);
+	goto out;
 }
 
 static struct crypto_template crypto_tmpl = {
 	.name = "lrw",
-	.alloc = alloc,
-	.free = free,
+	.create = create,
 	.module = THIS_MODULE,
 };
 

^ permalink raw reply related

* [v3 PATCH 2/16] crypto: aes-ce-ccm - Use skcipher walk interface
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch makes use of the new skcipher walk interface instead of
the obsolete blkcipher walk interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 arch/arm64/crypto/aes-ce-ccm-glue.c |   50 +++++++++---------------------------
 1 file changed, 13 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index f4bf2f2..d4f3568 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -11,9 +11,9 @@
 #include <asm/neon.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
 #include "aes-ce-setkey.h"
@@ -149,12 +149,7 @@ static int ccm_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
-	struct blkcipher_desc desc = { .info = req->iv };
-	struct blkcipher_walk walk;
-	struct scatterlist srcbuf[2];
-	struct scatterlist dstbuf[2];
-	struct scatterlist *src;
-	struct scatterlist *dst;
+	struct skcipher_walk walk;
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen;
@@ -172,27 +167,19 @@ static int ccm_encrypt(struct aead_request *req)
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
-	dst = src;
-	if (req->src != req->dst)
-		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
-
-	blkcipher_walk_init(&walk, dst, src, len);
-	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
-					     AES_BLOCK_SIZE);
+	err = skcipher_walk_aead(&walk, req, true);
 
 	while (walk.nbytes) {
 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		if (walk.nbytes == len)
+		if (walk.nbytes == walk.total)
 			tail = 0;
 
 		ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   walk.nbytes - tail, ctx->key_enc,
 				   num_rounds(ctx), mac, walk.iv);
 
-		len -= walk.nbytes - tail;
-		err = blkcipher_walk_done(&desc, &walk, tail);
+		err = skcipher_walk_done(&walk, tail);
 	}
 	if (!err)
 		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
@@ -203,7 +190,7 @@ static int ccm_encrypt(struct aead_request *req)
 		return err;
 
 	/* copy authtag to end of dst */
-	scatterwalk_map_and_copy(mac, dst, req->cryptlen,
+	scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
 				 crypto_aead_authsize(aead), 1);
 
 	return 0;
@@ -214,12 +201,7 @@ static int ccm_decrypt(struct aead_request *req)
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
 	unsigned int authsize = crypto_aead_authsize(aead);
-	struct blkcipher_desc desc = { .info = req->iv };
-	struct blkcipher_walk walk;
-	struct scatterlist srcbuf[2];
-	struct scatterlist dstbuf[2];
-	struct scatterlist *src;
-	struct scatterlist *dst;
+	struct skcipher_walk walk;
 	u8 __aligned(8) mac[AES_BLOCK_SIZE];
 	u8 buf[AES_BLOCK_SIZE];
 	u32 len = req->cryptlen - authsize;
@@ -237,27 +219,19 @@ static int ccm_decrypt(struct aead_request *req)
 	/* preserve the original iv for the final round */
 	memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-	src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
-	dst = src;
-	if (req->src != req->dst)
-		dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
-
-	blkcipher_walk_init(&walk, dst, src, len);
-	err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
-					     AES_BLOCK_SIZE);
+	err = skcipher_walk_aead(&walk, req, true);
 
 	while (walk.nbytes) {
 		u32 tail = walk.nbytes % AES_BLOCK_SIZE;
 
-		if (walk.nbytes == len)
+		if (walk.nbytes == walk.total)
 			tail = 0;
 
 		ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				   walk.nbytes - tail, ctx->key_enc,
 				   num_rounds(ctx), mac, walk.iv);
 
-		len -= walk.nbytes - tail;
-		err = blkcipher_walk_done(&desc, &walk, tail);
+		err = skcipher_walk_done(&walk, tail);
 	}
 	if (!err)
 		ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx));
@@ -268,7 +242,8 @@ static int ccm_decrypt(struct aead_request *req)
 		return err;
 
 	/* compare calculated auth tag with the stored one */
-	scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize,
+	scatterwalk_map_and_copy(buf, req->src,
+				 req->assoclen + req->cryptlen - authsize,
 				 authsize, 0);
 
 	if (crypto_memneq(mac, buf, authsize))
@@ -287,6 +262,7 @@ static int ccm_decrypt(struct aead_request *req)
 		.cra_module		= THIS_MODULE,
 	},
 	.ivsize		= AES_BLOCK_SIZE,
+	.chunksize	= AES_BLOCK_SIZE,
 	.maxauthsize	= AES_BLOCK_SIZE,
 	.setkey		= ccm_setkey,
 	.setauthsize	= ccm_setauthsize,

^ permalink raw reply related

* [v3 PATCH 1/16] crypto: skcipher - Add skcipher walk interface
From: Herbert Xu @ 2016-11-22 12:08 UTC (permalink / raw)
  To: Linux Crypto Mailing List
In-Reply-To: <20161122120703.GA11911@gondor.apana.org.au>

This patch adds the skcipher walk interface which replaces both
blkcipher walk and ablkcipher walk.  Just like blkcipher walk it
can also be used for AEAD algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---

 crypto/skcipher.c                  |  511 +++++++++++++++++++++++++++++++++++++
 include/crypto/internal/skcipher.h |   47 +++
 2 files changed, 558 insertions(+)

diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index f7d0018..0f30719 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -14,9 +14,12 @@
  *
  */
 
+#include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/bug.h>
 #include <linux/cryptouser.h>
+#include <linux/list.h>
 #include <linux/module.h>
 #include <linux/rtnetlink.h>
 #include <linux/seq_file.h>
@@ -24,6 +27,514 @@
 
 #include "internal.h"
 
+enum {
+	SKCIPHER_WALK_PHYS = 1 << 0,
+	SKCIPHER_WALK_SLOW = 1 << 1,
+	SKCIPHER_WALK_COPY = 1 << 2,
+	SKCIPHER_WALK_DIFF = 1 << 3,
+	SKCIPHER_WALK_SLEEP = 1 << 4,
+};
+
+struct skcipher_walk_buffer {
+	struct list_head entry;
+	struct scatter_walk dst;
+	unsigned int len;
+	u8 *data;
+	u8 buffer[];
+};
+
+static int skcipher_walk_next(struct skcipher_walk *walk);
+
+static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr)
+{
+	if (PageHighMem(scatterwalk_page(walk)))
+		kunmap_atomic(vaddr);
+}
+
+static inline void *skcipher_map(struct scatter_walk *walk)
+{
+	struct page *page = scatterwalk_page(walk);
+
+	return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) +
+	       offset_in_page(walk->offset);
+}
+
+static inline void skcipher_map_src(struct skcipher_walk *walk)
+{
+	walk->src.virt.addr = skcipher_map(&walk->in);
+}
+
+static inline void skcipher_map_dst(struct skcipher_walk *walk)
+{
+	walk->dst.virt.addr = skcipher_map(&walk->out);
+}
+
+static inline void skcipher_unmap_src(struct skcipher_walk *walk)
+{
+	skcipher_unmap(&walk->in, walk->src.virt.addr);
+}
+
+static inline void skcipher_unmap_dst(struct skcipher_walk *walk)
+{
+	skcipher_unmap(&walk->out, walk->dst.virt.addr);
+}
+
+static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk)
+{
+	return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+}
+
+/* Get a spot of the specified length that does not straddle a page.
+ * The caller needs to ensure that there is enough space for this operation.
+ */
+static inline u8 *skcipher_get_spot(u8 *start, unsigned int len)
+{
+	u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK);
+
+	return max(start, end_page);
+}
+
+static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
+{
+	u8 *addr;
+
+	addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1);
+	addr = skcipher_get_spot(addr, bsize);
+	scatterwalk_copychunks(addr, &walk->out, bsize,
+			       (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1);
+	return 0;
+}
+
+int skcipher_walk_done(struct skcipher_walk *walk, int err)
+{
+	unsigned int n = walk->nbytes - err;
+	unsigned int nbytes;
+
+	nbytes = walk->total - n;
+
+	if (unlikely(err < 0)) {
+		nbytes = 0;
+		n = 0;
+	} else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
+					   SKCIPHER_WALK_SLOW |
+					   SKCIPHER_WALK_COPY |
+					   SKCIPHER_WALK_DIFF)))) {
+unmap_src:
+		skcipher_unmap_src(walk);
+	} else if (walk->flags & SKCIPHER_WALK_DIFF) {
+		skcipher_unmap_dst(walk);
+		goto unmap_src;
+	} else if (walk->flags & SKCIPHER_WALK_COPY) {
+		skcipher_map_dst(walk);
+		memcpy(walk->dst.virt.addr, walk->page, n);
+		skcipher_unmap_dst(walk);
+	} else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) {
+		if (WARN_ON(err)) {
+			err = -EINVAL;
+			nbytes = 0;
+		} else
+			n = skcipher_done_slow(walk, n);
+	}
+
+	if (err > 0)
+		err = 0;
+
+	walk->total = nbytes;
+	walk->nbytes = nbytes;
+
+	scatterwalk_advance(&walk->in, n);
+	scatterwalk_advance(&walk->out, n);
+	scatterwalk_done(&walk->in, 0, nbytes);
+	scatterwalk_done(&walk->out, 1, nbytes);
+
+	if (nbytes) {
+		crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ?
+			     CRYPTO_TFM_REQ_MAY_SLEEP : 0);
+		return skcipher_walk_next(walk);
+	}
+
+	/* Short-circuit for the common/fast path. */
+	if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
+		goto out;
+
+	if (walk->flags & SKCIPHER_WALK_PHYS)
+		goto out;
+
+	if (walk->iv != walk->oiv)
+		memcpy(walk->oiv, walk->iv, walk->ivsize);
+	if (walk->buffer != walk->page)
+		kfree(walk->buffer);
+	if (walk->page)
+		free_page((unsigned long)walk->page);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_done);
+
+void skcipher_walk_complete(struct skcipher_walk *walk, int err)
+{
+	struct skcipher_walk_buffer *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &walk->buffers, entry) {
+		u8 *data;
+
+		if (err)
+			goto done;
+
+		data = p->data;
+		if (!data) {
+			data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1);
+			data = skcipher_get_spot(data, walk->chunksize);
+		}
+
+		scatterwalk_copychunks(data, &p->dst, p->len, 1);
+
+		if (offset_in_page(p->data) + p->len + walk->chunksize >
+		    PAGE_SIZE)
+			free_page((unsigned long)p->data);
+
+done:
+		list_del(&p->entry);
+		kfree(p);
+	}
+
+	if (!err && walk->iv != walk->oiv)
+		memcpy(walk->oiv, walk->iv, walk->ivsize);
+	if (walk->buffer != walk->page)
+		kfree(walk->buffer);
+	if (walk->page)
+		free_page((unsigned long)walk->page);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_complete);
+
+static void skcipher_queue_write(struct skcipher_walk *walk,
+				 struct skcipher_walk_buffer *p)
+{
+	p->dst = walk->out;
+	list_add_tail(&p->entry, &walk->buffers);
+}
+
+static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize)
+{
+	bool phys = walk->flags & SKCIPHER_WALK_PHYS;
+	unsigned alignmask = walk->alignmask;
+	struct skcipher_walk_buffer *p;
+	unsigned a;
+	unsigned n;
+	u8 *buffer;
+	void *v;
+
+	if (!phys) {
+		buffer = walk->buffer ?: walk->page;
+		if (buffer)
+			goto ok;
+	}
+
+	/* Start with the minimum alignment of kmalloc. */
+	a = crypto_tfm_ctx_alignment() - 1;
+	n = bsize;
+
+	if (phys) {
+		/* Calculate the minimum alignment of p->buffer. */
+		a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1;
+		n += sizeof(*p);
+	}
+
+	/* Minimum size to align p->buffer by alignmask. */
+	n += alignmask & ~a;
+
+	/* Minimum size to ensure p->buffer does not straddle a page. */
+	n += (bsize - 1) & ~(alignmask | a);
+
+	v = kzalloc(n, skcipher_walk_gfp(walk));
+	if (!v)
+		return skcipher_walk_done(walk, -ENOMEM);
+
+	if (phys) {
+		p = v;
+		p->len = bsize;
+		skcipher_queue_write(walk, p);
+		buffer = p->buffer;
+	} else {
+		walk->buffer = v;
+		buffer = v;
+	}
+
+ok:
+	walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1);
+	walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize);
+	walk->src.virt.addr = walk->dst.virt.addr;
+
+	scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0);
+
+	walk->nbytes = bsize;
+	walk->flags |= SKCIPHER_WALK_SLOW;
+
+	return 0;
+}
+
+static int skcipher_next_copy(struct skcipher_walk *walk)
+{
+	struct skcipher_walk_buffer *p;
+	u8 *tmp = walk->page;
+
+	skcipher_map_src(walk);
+	memcpy(tmp, walk->src.virt.addr, walk->nbytes);
+	skcipher_unmap_src(walk);
+
+	walk->src.virt.addr = tmp;
+	walk->dst.virt.addr = tmp;
+
+	if (!(walk->flags & SKCIPHER_WALK_PHYS))
+		return 0;
+
+	p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk));
+	if (!p)
+		return -ENOMEM;
+
+	p->data = walk->page;
+	p->len = walk->nbytes;
+	skcipher_queue_write(walk, p);
+
+	if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize >
+	    PAGE_SIZE)
+		walk->page = NULL;
+	else
+		walk->page += walk->nbytes;
+
+	return 0;
+}
+
+static int skcipher_next_fast(struct skcipher_walk *walk)
+{
+	unsigned long diff;
+
+	walk->src.phys.page = scatterwalk_page(&walk->in);
+	walk->src.phys.offset = offset_in_page(walk->in.offset);
+	walk->dst.phys.page = scatterwalk_page(&walk->out);
+	walk->dst.phys.offset = offset_in_page(walk->out.offset);
+
+	if (walk->flags & SKCIPHER_WALK_PHYS)
+		return 0;
+
+	diff = walk->src.phys.offset - walk->dst.phys.offset;
+	diff |= walk->src.virt.page - walk->dst.virt.page;
+
+	skcipher_map_src(walk);
+	walk->dst.virt.addr = walk->src.virt.addr;
+
+	if (diff) {
+		walk->flags |= SKCIPHER_WALK_DIFF;
+		skcipher_map_dst(walk);
+	}
+
+	return 0;
+}
+
+static int skcipher_walk_next(struct skcipher_walk *walk)
+{
+	unsigned int bsize;
+	unsigned int n;
+	int err;
+
+	walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY |
+			 SKCIPHER_WALK_DIFF);
+
+	n = walk->total;
+	bsize = min(walk->chunksize, max(n, walk->blocksize));
+	n = scatterwalk_clamp(&walk->in, n);
+	n = scatterwalk_clamp(&walk->out, n);
+
+	if (unlikely(n < bsize)) {
+		if (unlikely(walk->total < walk->blocksize))
+			return skcipher_walk_done(walk, -EINVAL);
+
+slow_path:
+		err = skcipher_next_slow(walk, bsize);
+		goto set_phys_lowmem;
+	}
+
+	if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) {
+		if (!walk->page) {
+			gfp_t gfp = skcipher_walk_gfp(walk);
+
+			walk->page = (void *)__get_free_page(gfp);
+			if (!walk->page)
+				goto slow_path;
+		}
+
+		walk->nbytes = min_t(unsigned, n,
+				     PAGE_SIZE - offset_in_page(walk->page));
+		walk->flags |= SKCIPHER_WALK_COPY;
+		err = skcipher_next_copy(walk);
+		goto set_phys_lowmem;
+	}
+
+	walk->nbytes = n;
+
+	return skcipher_next_fast(walk);
+
+set_phys_lowmem:
+	if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) {
+		walk->src.phys.page = virt_to_page(walk->src.virt.addr);
+		walk->dst.phys.page = virt_to_page(walk->dst.virt.addr);
+		walk->src.phys.offset &= PAGE_SIZE - 1;
+		walk->dst.phys.offset &= PAGE_SIZE - 1;
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_next);
+
+static int skcipher_copy_iv(struct skcipher_walk *walk)
+{
+	unsigned a = crypto_tfm_ctx_alignment() - 1;
+	unsigned alignmask = walk->alignmask;
+	unsigned ivsize = walk->ivsize;
+	unsigned bs = walk->chunksize;
+	unsigned aligned_bs;
+	unsigned size;
+	u8 *iv;
+
+	aligned_bs = ALIGN(bs, alignmask);
+
+	/* Minimum size to align buffer by alignmask. */
+	size = alignmask & ~a;
+
+	if (walk->flags & SKCIPHER_WALK_PHYS)
+		size += ivsize;
+	else {
+		size += aligned_bs + ivsize;
+
+		/* Minimum size to ensure buffer does not straddle a page. */
+		size += (bs - 1) & ~(alignmask | a);
+	}
+
+	walk->buffer = kmalloc(size, skcipher_walk_gfp(walk));
+	if (!walk->buffer)
+		return -ENOMEM;
+
+	iv = PTR_ALIGN(walk->buffer, alignmask + 1);
+	iv = skcipher_get_spot(iv, bs) + aligned_bs;
+
+	walk->iv = memcpy(iv, walk->iv, walk->ivsize);
+	return 0;
+}
+
+static int skcipher_walk_first(struct skcipher_walk *walk)
+{
+	walk->nbytes = 0;
+
+	if (WARN_ON_ONCE(in_irq()))
+		return -EDEADLK;
+
+	if (unlikely(!walk->total))
+		return 0;
+
+	walk->buffer = NULL;
+	if (unlikely(((unsigned long)walk->iv & walk->alignmask))) {
+		int err = skcipher_copy_iv(walk);
+		if (err)
+			return err;
+	}
+
+	walk->page = NULL;
+	walk->nbytes = walk->total;
+
+	return skcipher_walk_next(walk);
+}
+
+static int skcipher_walk_skcipher(struct skcipher_walk *walk,
+				  struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+	scatterwalk_start(&walk->in, req->src);
+	scatterwalk_start(&walk->out, req->dst);
+
+	walk->total = req->cryptlen;
+	walk->iv = req->iv;
+	walk->oiv = req->iv;
+
+	walk->flags &= ~SKCIPHER_WALK_SLEEP;
+	walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+		       SKCIPHER_WALK_SLEEP : 0;
+
+	walk->blocksize = crypto_skcipher_blocksize(tfm);
+	walk->chunksize = crypto_skcipher_chunksize(tfm);
+	walk->ivsize = crypto_skcipher_ivsize(tfm);
+	walk->alignmask = crypto_skcipher_alignmask(tfm);
+
+	return skcipher_walk_first(walk);
+}
+
+int skcipher_walk_virt(struct skcipher_walk *walk,
+		       struct skcipher_request *req, bool atomic)
+{
+	int err;
+
+	walk->flags &= ~SKCIPHER_WALK_PHYS;
+
+	err = skcipher_walk_skcipher(walk, req);
+
+	walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_virt);
+
+void skcipher_walk_atomise(struct skcipher_walk *walk)
+{
+	walk->flags &= ~SKCIPHER_WALK_SLEEP;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_atomise);
+
+int skcipher_walk_async(struct skcipher_walk *walk,
+			struct skcipher_request *req)
+{
+	walk->flags |= SKCIPHER_WALK_PHYS;
+
+	INIT_LIST_HEAD(&walk->buffers);
+
+	return skcipher_walk_skcipher(walk, req);
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_async);
+
+int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
+		       bool atomic)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	int err;
+
+	scatterwalk_start(&walk->in, req->src);
+	scatterwalk_start(&walk->out, req->dst);
+
+	scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2);
+	scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2);
+
+	walk->total = req->cryptlen;
+	walk->iv = req->iv;
+	walk->oiv = req->iv;
+
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
+		walk->flags |= SKCIPHER_WALK_SLEEP;
+	else
+		walk->flags &= ~SKCIPHER_WALK_SLEEP;
+
+	walk->blocksize = crypto_aead_blocksize(tfm);
+	walk->chunksize = crypto_aead_chunksize(tfm);
+	walk->ivsize = crypto_aead_ivsize(tfm);
+	walk->alignmask = crypto_aead_alignmask(tfm);
+
+	err = skcipher_walk_first(walk);
+
+	if (atomic)
+		walk->flags &= ~SKCIPHER_WALK_SLEEP;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(skcipher_walk_aead);
+
 static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
 {
 	if (alg->cra_type == &crypto_blkcipher_type)
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 7a7e815..d55041f 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -15,8 +15,10 @@
 
 #include <crypto/algapi.h>
 #include <crypto/skcipher.h>
+#include <linux/list.h>
 #include <linux/types.h>
 
+struct aead_request;
 struct rtattr;
 
 struct skcipher_instance {
@@ -34,6 +36,40 @@ struct crypto_skcipher_spawn {
 	struct crypto_spawn base;
 };
 
+struct skcipher_walk {
+	union {
+		struct {
+			struct page *page;
+			unsigned long offset;
+		} phys;
+
+		struct {
+			u8 *page;
+			void *addr;
+		} virt;
+	} src, dst;
+
+	struct scatter_walk in;
+	unsigned int nbytes;
+
+	struct scatter_walk out;
+	unsigned int total;
+
+	struct list_head buffers;
+
+	u8 *page;
+	u8 *buffer;
+	u8 *oiv;
+	void *iv;
+
+	unsigned int ivsize;
+
+	int flags;
+	unsigned int blocksize;
+	unsigned int chunksize;
+	unsigned int alignmask;
+};
+
 extern const struct crypto_type crypto_givcipher_type;
 
 static inline struct crypto_instance *skcipher_crypto_instance(
@@ -104,6 +140,17 @@ static inline void crypto_skcipher_set_reqsize(
 int skcipher_register_instance(struct crypto_template *tmpl,
 			       struct skcipher_instance *inst);
 
+int skcipher_walk_done(struct skcipher_walk *walk, int err);
+int skcipher_walk_virt(struct skcipher_walk *walk,
+		       struct skcipher_request *req,
+		       bool atomic);
+void skcipher_walk_atomise(struct skcipher_walk *walk);
+int skcipher_walk_async(struct skcipher_walk *walk,
+			struct skcipher_request *req);
+int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req,
+		       bool atomic);
+void skcipher_walk_complete(struct skcipher_walk *walk, int err);
+
 static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
 					       int err)
 {

^ permalink raw reply related

* [v3 PATCH 0/16] crypto: skcipher - skcipher algorithm conversion part 3
From: Herbert Xu @ 2016-11-22 12:07 UTC (permalink / raw)
  To: Linux Crypto Mailing List

Hi:

v3 fixes a number of bugs.

This patch series is the third instalment of the skcipher conversion.

It introduces the skcipher walk interface, and converts a number of
core algorithms such as CBC and LRW/XTS, as well as the aesni on x86
and various ARM aes implementations.

It also adds an skcipher version of cryptd, as well as making
testmgr skip all internal algorithms when testing.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH v3] arm64/crypto: Accelerated CRC T10 DIF computation
From: YueHaibing @ 2016-11-22 10:14 UTC (permalink / raw)
  To: herbert, davem, catalin.marinas, will.deacon
  Cc: linux-crypto, linux-arm-kernel, linux-kernel, dingtianhong,
	hanjun.guo, yangshengkai, YueHaibing

This is the ARM64 CRC T10 DIF transform accelerated with the ARMv8
NEON instruction.The config CRYPTO_CRCT10DIF_NEON should be turned
on to enable the feature.The crc_t10dif crypto library function will
use this faster algorithm when crct10dif_neon module is loaded.

Tcrypt benchmark results:

HIP06  (mode=320 sec=2)

The ratio of bytes/sec crct10dif-neon Vs. crct10dif-generic:

                        TEST                              neon          generic         ratio
  16 byte blocks,   16 bytes per update,   1 updates    214506112       171095400       1.25
  64 byte blocks,   16 bytes per update,   4 updates    139385312       119036352       1.17
  64 byte blocks,   64 bytes per update,   1 updates    671523712       198945344       3.38
 256 byte blocks,   16 bytes per update,  16 updates    157674880       125146752       1.26
 256 byte blocks,   64 bytes per update,   4 updates    491888128       175764096       2.80
 256 byte blocks,  256 bytes per update,   1 updates    2123298176      206995200       10.26
1024 byte blocks,   16 bytes per update,  64 updates    161243136       126460416       1.28
1024 byte blocks,  256 bytes per update,   4 updates    1643020800      200027136       8.21
1024 byte blocks, 1024 bytes per update,   1 updates    4238239232      209106432       20.27
2048 byte blocks,   16 bytes per update, 128 updates    162079744       126953472       1.28
2048 byte blocks,  256 bytes per update,   8 updates    1693587456      200867840       8.43
2048 byte blocks, 1024 bytes per update,   2 updates    3424323584      206330880       16.60
2048 byte blocks, 2048 bytes per update,   1 updates    5228207104      208620544       25.06
4096 byte blocks,   16 bytes per update, 256 updates    162304000       126894080       1.28
4096 byte blocks,  256 bytes per update,  16 updates    1731862528      201197568       8.61
4096 byte blocks, 1024 bytes per update,   4 updates    3668625408      207003648       17.72
4096 byte blocks, 4096 bytes per update,   1 updates    5551239168      209127424       26.54
8192 byte blocks,   16 bytes per update, 512 updates    162779136       126984192       1.28
8192 byte blocks,  256 bytes per update,  32 updates    1753702400      201420800       8.71
8192 byte blocks, 1024 bytes per update,   8 updates    3760918528      207351808       18.14
8192 byte blocks, 4096 bytes per update,   2 updates    5483655168      208928768       26.25
8192 byte blocks, 8192 bytes per update,   1 updates    5623377920      209108992       26.89

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: YangShengkai <yangshengkai@huawei.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>

---
 arch/arm64/crypto/Kconfig                 |   5 +
 arch/arm64/crypto/Makefile                |   4 +
 arch/arm64/crypto/crct10dif-neon-asm_64.S | 751 ++++++++++++++++++++++++++++++
 arch/arm64/crypto/crct10dif-neon_glue.c   | 115 +++++
 4 files changed, 875 insertions(+)
 create mode 100644 arch/arm64/crypto/crct10dif-neon-asm_64.S
 create mode 100644 arch/arm64/crypto/crct10dif-neon_glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2cf32e9..2e450bf 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -23,6 +23,11 @@ config CRYPTO_GHASH_ARM64_CE
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
+config CRYPTO_CRCT10DIF_NEON
+	tristate "CRCT10DIF hardware acceleration using NEON instructions"
+	depends on ARM64 && KERNEL_MODE_NEON
+	select CRYPTO_HASH
+
 config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index abb79b3..6c9ff2c 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -29,6 +29,10 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
 obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
 aes-neon-blk-y := aes-glue-neon.o aes-neon.o
 
+obj-$(CONFIG_CRYPTO_CRCT10DIF_NEON) += crct10dif-neon.o
+crct10dif-neon-y := crct10dif-neon-asm_64.o crct10dif-neon_glue.o
+AFLAGS_crct10dif-neon-asm_64.o	:= -march=armv8-a+crypto
+
 AFLAGS_aes-ce.o		:= -DINTERLEAVE=4
 AFLAGS_aes-neon.o	:= -DINTERLEAVE=4
 
diff --git a/arch/arm64/crypto/crct10dif-neon-asm_64.S b/arch/arm64/crypto/crct10dif-neon-asm_64.S
new file mode 100644
index 0000000..2ae3033
--- /dev/null
+++ b/arch/arm64/crypto/crct10dif-neon-asm_64.S
@@ -0,0 +1,751 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.global crc_t10dif_neon
+.text
+
+/* X0 is initial CRC value
+ * X1 is data buffer
+ * X2 is the length of buffer
+ * X3 is the backup buffer(for extend)
+ * X4 for other extend parameter(for extend)
+ * Q0, Q1, Q2, Q3 maybe as parameter for other functions,
+ * the value of Q0, Q1, Q2, Q3 maybe modified.
+ *
+ * suggestion:
+ * 1. dont use general purpose register for calculation
+ * 2. set data endianness outside of the kernel
+ * 3. use ext as shifting around
+ * 4. dont use LD3/LD4, ST3/ST4
+ */
+
+crc_t10dif_neon:
+	/* push the register to stack that CRC16 will use */
+	STP		X5, X6, [sp, #-0x10]!
+	STP		X7, X8, [sp, #-0x10]!
+	STP		X9, X10, [sp, #-0x10]!
+	STP		X11, X12, [sp, #-0x10]!
+	STP		X13, X14, [sp, #-0x10]!
+	STP		Q10, Q11, [sp, #-0x20]!
+	STP		Q12, Q13, [sp, #-0x20]!
+	STP		Q4, Q5, [sp, #-0x20]!
+	STP		Q6, Q7, [sp, #-0x20]!
+	STP		Q8, Q9, [sp, #-0x20]!
+	STP		Q14, Q15, [sp, #-0x20]!
+	STP		Q16, Q17, [sp, #-0x20]!
+	STP		Q18, Q19, [sp, #-0x20]!
+
+	SUB		sp,sp,#0x20
+
+	MOV		X11, #0		// PUSH STACK FLAG
+
+	CMP		X2, #0x80
+	B.LT		2f		// _less_than_128, <128
+
+	/* V10/V11/V12/V13	is 128bit.
+	 * we get data 512bit( by cacheline ) each time
+	 */
+	LDP		Q10, Q11, [X1], #0x20
+	LDP		Q12, Q13, [X1], #0x20
+
+	/* move the initial value to V6 register */
+	LSL		X0, X0, #48
+	EOR		V6.16B, V6.16B, V6.16B
+	MOV		V6.D[1], X0
+
+	/* big-little end change. because the data in memory is little-end,
+	 * we deal the data for bigend
+	 */
+
+	REV64		V10.16B, V10.16B
+	REV64		V11.16B, V11.16B
+	REV64		V12.16B, V12.16B
+	REV64		V13.16B, V13.16B
+	EXT     	V10.16B, V10.16B, V10.16B, #8
+	EXT     	V11.16B, V11.16B, V11.16B, #8
+	EXT     	V12.16B, V12.16B, V12.16B, #8
+	EXT     	V13.16B, V13.16B, V13.16B, #8
+
+	EOR		V10.16B, V10.16B, V6.16B
+
+	SUB		X2, X2, #0x80
+	ADD     	X5, X1, #0x20
+
+	/* deal data when the size of buffer bigger than 128 bytes */
+	/* _fold_64_B_loop */
+	LDR		Q6,=0xe658000000000000044c000000000000
+1:
+
+	LDP		Q16, Q17, [X1] ,#0x40
+	LDP		Q18, Q19, [X5], #0x40
+
+	/* carry-less multiply.
+	 * V10 high-64bits carry-less multiply
+	 * V6 high-64bits(PMULL2)
+	 * V11 low-64bits carry-less multiply V6 low-64bits(PMULL)
+	 */
+
+	PMULL2		V4.1Q, V10.2D, V6.2D
+	PMULL		V10.1Q, V10.1D, V6.1D
+	PMULL2		V5.1Q, V11.2D, V6.2D
+	PMULL		V11.1Q, V11.1D, V6.1D
+
+	REV64		V16.16B, V16.16B
+	REV64		V17.16B, V17.16B
+	REV64		V18.16B, V18.16B
+	REV64		V19.16B, V19.16B
+
+	PMULL2		V14.1Q, V12.2D, V6.2D
+	PMULL		V12.1Q, V12.1D, V6.1D
+	PMULL2		V15.1Q, V13.2D, V6.2D
+	PMULL		V13.1Q, V13.1D, V6.1D
+
+	EXT     	V16.16B, V16.16B, V16.16B, #8
+	EOR		V10.16B, V10.16B, V4.16B
+
+	EXT     	V17.16B, V17.16B, V17.16B, #8
+	EOR		V11.16B, V11.16B, V5.16B
+
+	EXT     	V18.16B, V18.16B, V18.16B, #8
+	EOR		V12.16B, V12.16B, V14.16B
+
+	EXT     	V19.16B, V19.16B, V19.16B, #8
+	EOR		V13.16B, V13.16B, V15.16B
+
+	SUB		X2, X2, #0x40
+
+
+	EOR		V10.16B, V10.16B, V16.16B
+	EOR		V11.16B, V11.16B, V17.16B
+
+	EOR		V12.16B, V12.16B, V18.16B
+	EOR		V13.16B, V13.16B, V19.16B
+
+	CMP		X2, #0x0
+	B.GE		1b			// >=0
+
+	LDR		Q6, =0x06df0000000000002d56000000000000
+	MOV		V4.16B, V10.16B
+	/* V10 carry-less 0x06df000000000000([127:64]*[127:64]) */
+	PMULL		V4.1Q, V4.1D, V6.1D	//switch PMULL & PMULL2 order
+	PMULL2		V10.1Q, V10.2D, V6.2D
+	EOR		V11.16B, V11.16B, V4.16B
+	EOR		V11.16B, V11.16B, V10.16B
+
+	MOV		V4.16B, V11.16B
+	PMULL		V4.1Q, V4.1D, V6.1D	//switch PMULL & PMULL2 order
+	PMULL2		V11.1Q, V11.2D, V6.2D
+	EOR		V12.16B, V12.16B, V4.16B
+	EOR		V12.16B, V12.16B, V11.16B
+
+	MOV		V4.16B, V12.16B
+	PMULL		V4.1Q, V4.1D, V6.1D	//switch PMULL & PMULL2 order
+	PMULL2		V12.1Q, V12.2D, V6.2D
+	EOR		V13.16B, V13.16B, V4.16B
+	EOR		V13.16B, V13.16B, V12.16B
+
+	ADD		X2, X2, #48
+	CMP		X2, #0x0
+	B.LT		3f			// _final_reduction_for_128, <0
+
+	/* _16B_reduction_loop */
+4:
+	/* unrelated load as early as possible*/
+	LDR		Q10, [X1], #0x10
+
+	MOV		V4.16B, V13.16B
+	PMULL2		V13.1Q, V13.2D, V6.2D
+	PMULL		V4.1Q, V4.1D, V6.1D
+	EOR		V13.16B, V13.16B, V4.16B
+
+	REV64		V10.16B, V10.16B
+	EXT     	V10.16B, V10.16B, V10.16B, #8
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	SUB		X2, X2, #0x10
+	CMP		X2, #0x0
+	B.GE		4b			// _16B_reduction_loop, >=0
+
+	/*  _final_reduction_for_128 */
+3:	ADD		X2, X2, #0x10
+	CMP		X2, #0x0
+	B.EQ		5f			// _128_done, ==0
+
+	/* _get_last_two_xmms */
+6:	MOV		V12.16B, V13.16B
+	SUB		X1, X1, #0x10
+	ADD		X1, X1, X2
+	LDR		Q11, [X1], #0x10
+	REV64		V11.16B, V11.16B
+	EXT		V11.16B, V11.16B, V11.16B, #8
+
+	CMP		X2, #8
+	B.EQ		50f
+	B.LT		51f
+	B.GT		52f
+
+50:
+	/* dont use X register as temp one */
+	FMOV		D14, D12
+	MOVI		D12, #0
+	MOV		V12.D[1],V14.D[0]
+	B		53f
+51:
+	MOV		X9, #64
+	LSL     	X13, X2, #3		// <<3 equal x8
+	SUB		X9, X9, X13
+	MOV		X5, V12.D[0]		// low 64-bit
+	MOV		X6, V12.D[1]		// high 64-bit
+	LSR		X10, X5, X9		// high bit of low 64-bit
+	LSL		X7, X5, X13
+	LSL		X8, X6, X13
+	ORR		X8, X8, X10		// combination of high 64-bit
+	MOV		V12.D[1], X8
+	MOV		V12.D[0], X7
+
+	B		53f
+52:
+	LSL		X13, X2, #3		// <<3 equal x8
+	SUB		X13, X13, #64
+
+	DUP		V18.2D, X13
+	FMOV		D16, D12
+	USHL		D16, D16, D18
+	EXT		V12.16B, V16.16B, V16.16B, #8
+
+53:
+	MOVI		D14, #0			//add one zero constant
+
+	CMP		X2, #0
+	B.EQ	30f
+	CMP		X2, #1
+	B.EQ	31f
+	CMP		X2, #2
+	B.EQ	32f
+	CMP		X2, #3
+	B.EQ	33f
+	CMP		X2, #4
+	B.EQ	34f
+	CMP		X2, #5
+	B.EQ	35f
+	CMP		X2, #6
+	B.EQ	36f
+	CMP		X2, #7
+	B.EQ	37f
+	CMP		X2, #8
+	B.EQ	38f
+	CMP		X2, #9
+	B.EQ	39f
+	CMP		X2, #10
+	B.EQ	40f
+	CMP		X2, #11
+	B.EQ	41f
+	CMP		X2, #12
+	B.EQ	42f
+	CMP		X2, #13
+	B.EQ	43f
+	CMP		X2, #14
+	B.EQ	44f
+	CMP		X2, #15
+	B.EQ	45f
+
+	// >> 128bit
+30:
+	EOR		V13.16B, V13.16B, V13.16B
+	EOR		V8.16B, V8.16B, V8.16B
+	LDR		Q9,=0xffffffffffffffffffffffffffffffff
+	B		46f
+
+	// >> 120bit
+31:
+	USHR		V13.2D, V13.2D, #56
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xff
+	LDR		Q9,=0xffffffffffffffffffffffffffffff00
+	B		46f
+
+	// >> 112bit
+32:
+	USHR		 V13.2D, V13.2D, #48
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffff
+	LDR		Q9,=0xffffffffffffffffffffffffffff0000
+	B		46f
+
+	// >> 104bit
+33:
+	USHR		V13.2D, V13.2D, #40
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffff
+	LDR		Q9,=0xffffffffffffffffffffffffff000000
+	B		46f
+
+	// >> 96bit
+34:
+	USHR		V13.2D, V13.2D, #32
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffffff
+	LDR		Q9,=0xffffffffffffffffffffffff00000000
+	B		46f
+
+	// >> 88bit
+35:
+	USHR		V13.2D, V13.2D, #24
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffffffff
+	LDR		Q9,=0xffffffffffffffffffffff0000000000
+	B		46f
+
+	// >> 80bit
+36:
+	USHR		V13.2D, V13.2D, #16
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffffffffff
+	LDR		Q9,=0xffffffffffffffffffff000000000000
+	B		46f
+
+	// >> 72bit
+37:
+	USHR		V13.2D, V13.2D, #8
+	EXT		V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffffffffffff
+	LDR		Q9,=0xffffffffffffffffff00000000000000
+	B		46f
+
+	// >> 64bit
+38:
+	EXT		 V13.16B, V13.16B, V14.16B, #8
+	LDR		Q8,=0xffffffffffffffff
+	LDR		Q9,=0xffffffffffffffff0000000000000000
+	B		46f
+
+	// >> 56bit
+39:
+	EXT		V13.16B, V13.16B, V13.16B, #7
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+	MOV		V13.B[9], V14.B[0]
+
+	LDR		Q8,=0xffffffffffffffffff
+	LDR		Q9,=0xffffffffffffff000000000000000000
+	B		46f
+
+	// >> 48bit
+40:
+	EXT		V13.16B, V13.16B, V13.16B, #6
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+
+	LDR		Q8,=0xffffffffffffffffffff
+	LDR		Q9,=0xffffffffffff00000000000000000000
+	B		46f
+
+	// >> 40bit
+41:
+	EXT		V13.16B, V13.16B, V13.16B, #5
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.B[11], V14.B[0]
+
+	LDR		Q8,=0xffffffffffffffffffffff
+	LDR		Q9,=0xffffffffff0000000000000000000000
+	B		46f
+
+	// >> 32bit
+42:
+	EXT		V13.16B, V13.16B, V13.16B, #4
+	MOV		V13.S[3], V14.S[0]
+
+	LDR		Q8,=0xffffffffffffffffffffffff
+	LDR		Q9,=0xffffffff000000000000000000000000
+	B		46f
+
+	// >> 24bit
+43:
+	EXT		V13.16B, V13.16B, V13.16B, #3
+	MOV		V13.H[7], V14.H[0]
+	MOV		V13.B[13], V14.B[0]
+
+	LDR		Q8,=0xffffffffffffffffffffffffff
+	LDR		Q9,=0xffffff00000000000000000000000000
+	B		46f
+
+	// >> 16bit
+44:
+	EXT		V13.16B, V13.16B, V13.16B, #2
+	MOV		V13.H[7], V14.H[0]
+
+	LDR		Q8,=0xffffffffffffffffffffffffffff
+	LDR		Q9,=0xffff0000000000000000000000000000
+	B		46f
+
+	// >> 8bit
+45:
+	EXT		 V13.16B, V13.16B, V13.16B, #1
+	MOV		V13.B[15], V14.B[0]
+
+	LDR		Q8,=0xffffffffffffffffffffffffffffff
+	LDR		Q9,=0xff000000000000000000000000000000
+
+	// backup V12 first
+	// pblendvb	xmm1, xmm2
+46:
+	AND		V12.16B, V12.16B, V9.16B
+	AND		V11.16B, V11.16B, V8.16B
+	ORR		V11.16B, V11.16B, V12.16B
+
+	MOV		V12.16B, V11.16B
+	MOV		V4.16B, V13.16B
+	PMULL2		V13.1Q, V13.2D, V6.2D
+	PMULL		V4.1Q, V4.1D, V6.1D
+	EOR		V13.16B, V13.16B, V4.16B
+	EOR		V13.16B, V13.16B, V12.16B
+
+	/* _128_done. we change the Q6 D[0] and D[1] */
+5:	LDR		Q6, =0x2d560000000000001368000000000000
+	MOVI		D14, #0
+	MOV		V10.16B, V13.16B
+	PMULL2		V13.1Q, V13.2D, V6.2D
+
+	MOV		V10.D[1], V10.D[0]
+	MOV		V10.D[0], V14.D[0]    //set zero
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	MOV		V10.16B, V13.16B
+	LDR		Q7, =0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
+	AND		V10.16B, V10.16B, V7.16B
+
+	MOV		S13, V13.S[3]
+
+	PMULL		V13.1Q, V13.1D, V6.1D
+	EOR		V13.16B, V13.16B, V10.16B
+
+	/* _barrett */
+7:	LDR		Q6, =0x00000001f65a57f8000000018bb70000
+	MOVI    	D14, #0
+	MOV		V10.16B, V13.16B
+	PMULL2		V13.1Q, V13.2D, V6.2D
+
+	EXT		V13.16B, V13.16B, V13.16B, #12
+	MOV		V13.S[0], V14.S[0]
+
+	EXT		V6.16B, V6.16B, V6.16B, #8
+	PMULL2		V13.1Q, V13.2D, V6.2D
+
+	EXT		V13.16B, V13.16B, V13.16B, #12
+	MOV		V13.S[0], V14.S[0]
+
+	EOR		V13.16B, V13.16B, V10.16B
+	MOV		X0, V13.D[0]
+
+	/* _cleanup */
+8:	MOV		X14, #48
+	LSR		X0, X0, X14
+99:
+	ADD		sp, sp, #0x20
+
+	LDP		Q18, Q19, [sp], #0x20
+	LDP		Q16, Q17, [sp], #0x20
+	LDP		Q14, Q15, [sp], #0x20
+
+	LDP		Q8, Q9, [sp], #0x20
+	LDP		Q6, Q7, [sp], #0x20
+	LDP		Q4, Q5, [sp], #0x20
+	LDP		Q12, Q13, [sp], #0x20
+	LDP		Q10, Q11, [sp], #0x20
+	LDP		X13, X14, [sp], #0x10
+	LDP		X11, X12, [sp], #0x10
+	LDP		X9, X10, [sp], #0x10
+	LDP		X7, X8, [sp], #0x10
+	LDP		X5, X6, [sp], #0x10
+
+	RET
+
+	/* _less_than_128 */
+2:	CMP		X2, #32
+	B.LT		9f				// _less_than_32
+	LDR		Q6, =0x06df0000000000002d56000000000000
+
+	LSL		X0, X0, #48
+	LDR		Q10, =0x0
+	MOV		V10.D[1], X0
+	LDR		Q13, [X1], #0x10
+	REV64		V13.16B, V13.16B
+	EXT     	V13.16B, V13.16B, V13.16B, #8
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	SUB		X2, X2, #32
+	B		4b
+
+	/* _less_than_32 */
+9:	CMP		X2, #0
+	B.EQ		99b			// _cleanup
+	LSL		X0, X0, #48
+	LDR		Q10,=0x0
+	MOV		V10.D[1], X0
+
+	CMP		X2, #16
+	B.EQ		10f			// _exact_16_left
+	B.LE		11f			// _less_than_16_left
+	LDR		Q13, [X1], #0x10
+
+	REV64		V13.16B, V13.16B
+	EXT		V13.16B, V13.16B, V13.16B, #8
+
+	EOR		V13.16B, V13.16B, V10.16B
+	SUB		X2, X2, #16
+	LDR		Q6, =0x06df0000000000002d56000000000000
+	B		6b			// _get_last_two_xmms
+
+	/*  _less_than_16_left */
+11:	CMP		X2, #4
+	B.LT		13f			// _only_less_than_4
+
+	/* backup the length of data, we used in _less_than_2_left */
+	MOV		X8, X2
+	CMP		X2, #8
+	B.LT		14f			// _less_than_8_left
+
+	LDR		X14, [X1], #8
+	/* push the data to stack, we backup the data to V10 */
+	STR		X14, [sp, #0]
+	SUB		X2, X2, #8
+	ADD		X11, X11, #8
+
+	/* _less_than_8_left */
+14:	CMP		X2, #4
+	B.LT		15f			// _less_than_4_left
+
+	/* get 32bit data */
+	LDR		W5, [X1], #4
+
+	/* push the data to stack */
+	STR		W5, [sp, X11]
+	SUB		X2, X2, #4
+	ADD		X11, X11, #4
+
+	/* _less_than_4_left */
+15:	CMP		X2, #2
+	B.LT		16f			// _less_than_2_left
+
+	/* get 16bits data */
+	LDRH		W6, [X1], #2
+
+	/* push the data to stack */
+	STRH		W6, [sp, X11]
+	SUB		X2, X2, #2
+	ADD		X11, X11, #2
+
+	/* _less_than_2_left */
+16:
+	/* get 8bits data */
+	LDRB		W7, [X1], #1
+	STRB		W7, [sp, X11]
+	ADD		X11, X11, #1
+
+	/* POP data from stack, store to V13 */
+	LDR		Q13, [sp]
+	MOVI    	D14, #0
+	REV64		V13.16B, V13.16B
+	MOV		V8.16B, V13.16B
+	MOV		V13.D[1], V8.D[0]
+	MOV		V13.D[0], V8.D[1]
+
+	EOR		V13.16B, V13.16B, V10.16B
+	CMP		X8, #15
+	B.EQ	80f
+	CMP		X8, #14
+	B.EQ	81f
+	CMP		X8, #13
+	B.EQ	82f
+	CMP		X8, #12
+	B.EQ	83f
+	CMP		X8, #11
+	B.EQ	84f
+	CMP		X8, #10
+	B.EQ	85f
+	CMP		X8, #9
+	B.EQ	86f
+	CMP		X8, #8
+	B.EQ	87f
+	CMP		X8, #7
+	B.EQ	88f
+	CMP		X8, #6
+	B.EQ	89f
+	CMP		X8, #5
+	B.EQ	90f
+	CMP		X8, #4
+	B.EQ	91f
+	CMP		X8, #3
+	B.EQ	92f
+	CMP		X8, #2
+	B.EQ	93f
+	CMP		X8, #1
+	B.EQ	94f
+	CMP		X8, #0
+	B.EQ	95f
+
+80:
+	EXT		V13.16B, V13.16B, V13.16B, #1
+	MOV		V13.B[15], V14.B[0]
+	B		5b
+
+81:
+	EXT		V13.16B, V13.16B, V13.16B, #2
+	MOV		 V13.H[7], V14.H[0]
+	B		5b
+
+82:
+	EXT		V13.16B, V13.16B, V13.16B, #3
+	MOV		V13.H[7], V14.H[0]
+	MOV		V13.B[13], V14.B[0]
+	B		5b
+83:
+
+	EXT		V13.16B, V13.16B, V13.16B, #4
+	MOV		V13.S[3], V14.S[0]
+	B		5b
+
+84:
+	EXT		V13.16B, V13.16B, V13.16B, #5
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.B[11], V14.B[0]
+	B		5b
+
+85:
+	EXT		V13.16B, V13.16B, V13.16B, #6
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+	B		5b
+
+86:
+	EXT		V13.16B, V13.16B, V13.16B, #7
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+	MOV		V13.B[9], V14.B[0]
+	B		5b
+
+87:
+	MOV		V13.D[0], V13.D[1]
+	MOV		V13.D[1], V14.D[0]
+	B		5b
+
+88:
+	EXT		V13.16B, V13.16B, V13.16B, #9
+	MOV		V13.D[1], V14.D[0]
+	MOV		V13.B[7], V14.B[0]
+	B		5b
+
+89:
+	EXT		V13.16B, V13.16B, V13.16B, #10
+	MOV		V13.D[1], V14.D[0]
+	MOV		 V13.H[3], V14.H[0]
+	B		5b
+
+90:
+	EXT		V13.16B, V13.16B, V13.16B, #11
+	MOV		V13.D[1], V14.D[0]
+	MOV		V13.H[3], V14.H[0]
+	MOV		V13.B[5], V14.B[0]
+	B		5b
+
+91:
+	MOV		V13.S[0], V13.S[3]
+	MOV		V13.D[1], V14.D[0]
+	MOV		V13.S[1], V14.S[0]
+	B		5b
+
+92:
+	EXT		V13.16B, V13.16B, V13.16B, #13
+	MOV		V13.D[1], V14.D[0]
+	MOV		V13.S[1], V14.S[0]
+	MOV		V13.B[3], V14.B[0]
+	B		5b
+
+93:
+	MOV		V15.H[0], V13.H[7]
+	MOV		V13.16B, V14.16B
+	MOV		V13.H[0], V15.H[0]
+	B		5b
+
+94:
+	MOV		V15.B[0], V13.B[15]
+	MOV		V13.16B, V14.16B
+	MOV		V13.B[0], V15.B[0]
+	B		5b
+
+95:
+	LDR		Q13,=0x0
+	B		5b				// _128_done
+
+	/* _exact_16_left */
+10:
+	LD1		{ V13.2D }, [X1], #0x10
+
+	REV64		V13.16B, V13.16B
+	EXT		V13.16B, V13.16B, V13.16B, #8
+	EOR		V13.16B, V13.16B, V10.16B
+	B		5b				// _128_done
+
+	/* _only_less_than_4 */
+13:	CMP		X2, #3
+	MOVI		D14, #0
+	B.LT		17f				//_only_less_than_3
+
+	LDR		S13, [X1], #4
+	MOV		 V13.B[15], V13.B[0]
+	MOV		V13.B[14], V13.B[1]
+	MOV		V13.B[13], V13.B[2]
+	MOV		V13.S[0], V13.S[1]
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	EXT		V13.16B, V13.16B, V13.16B, #5
+
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.B[11], V14.B[0]
+
+	B		7b				// _barrett
+	/* _only_less_than_3 */
+17:
+	CMP		X2, #2
+	B.LT		18f				// _only_less_than_2
+
+	LDR		H13, [X1], #2
+	MOV		V13.B[15], V13.B[0]
+	MOV		V13.B[14], V13.B[1]
+	MOV		V13.H[0], V13.H[1]
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	EXT		V13.16B, V13.16B, V13.16B, #6
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+
+	B		7b				// _barrett
+
+	/* _only_less_than_2 */
+18:
+	LDRB		W7, [X1], #1
+	LDR		Q13, = 0x0
+	MOV		V13.B[15], W7
+
+	EOR		V13.16B, V13.16B, V10.16B
+
+	EXT		V13.16B, V13.16B, V13.16B, #7
+	MOV		V13.S[3], V14.S[0]
+	MOV		V13.H[5], V14.H[0]
+	MOV		V13.B[9], V14.B[0]
+
+	B		7b				// _barrett
diff --git a/arch/arm64/crypto/crct10dif-neon_glue.c b/arch/arm64/crypto/crct10dif-neon_glue.c
new file mode 100644
index 0000000..a6d8c5c
--- /dev/null
+++ b/arch/arm64/crypto/crct10dif-neon_glue.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+asmlinkage __u16 crc_t10dif_neon(__u16 crc, const unsigned char *buf,
+				size_t len);
+
+struct chksum_desc_ctx {
+	__u16 crc;
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+static int chksum_init(struct shash_desc *desc)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = 0;
+
+	return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+			 unsigned int length)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	ctx->crc = crc_t10dif_neon(ctx->crc, data, length);
+	return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	*(__u16 *)out = ctx->crc;
+	return 0;
+}
+
+static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
+			u8 *out)
+{
+	*(__u16 *)out = crc_t10dif_neon(*crcp, data, len);
+	return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+			unsigned int len, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(&ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+			 unsigned int length, u8 *out)
+{
+	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	return __chksum_finup(&ctx->crc, data, length, out);
+}
+
+static struct shash_alg alg = {
+	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
+	.init		=	chksum_init,
+	.update		=	chksum_update,
+	.final		=	chksum_final,
+	.finup		=	chksum_finup,
+	.digest		=	chksum_digest,
+	.descsize		=	sizeof(struct chksum_desc_ctx),
+	.base			=	{
+		.cra_name		=	"crct10dif",
+		.cra_driver_name	=	"crct10dif-neon",
+		.cra_priority		=	200,
+		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
+		.cra_module		=	THIS_MODULE,
+	}
+};
+
+static int __init crct10dif_arm64_mod_init(void)
+{
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crct10dif_arm64_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(crct10dif_arm64_mod_init);
+module_exit(crct10dif_arm64_mod_fini);
+
+MODULE_AUTHOR("YueHaibing <yuehaibing@huawei.com>");
+MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with ARM64 NEON instruction.");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-neon");
-- 
2.7.0

^ permalink raw reply related

* [PATCH] crypto: powerpc - Rename CRYPT_CRC32C_VPMSUM option
From: Jean Delvare @ 2016-11-22  9:32 UTC (permalink / raw)
  To: linux-crypto
  Cc: Herbert Xu, Anton Blanchard, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Ellerman, David S. Miller

For consistency with the other 246 kernel configuration options,
rename CRYPT_CRC32C_VPMSUM to CRYPTO_CRC32C_VPMSUM.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Anton Blanchard <anton@samba.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/powerpc/crypto/Makefile |    2 +-
 crypto/Kconfig               |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

--- linux-4.9-rc5.orig/arch/powerpc/crypto/Makefile	2016-10-03 01:24:33.000000000 +0200
+++ linux-4.9-rc5/arch/powerpc/crypto/Makefile	2016-11-22 10:24:49.902659542 +0100
@@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.
 obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
-obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o
+obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
--- linux-4.9-rc5.orig/crypto/Kconfig	2016-10-03 01:24:33.000000000 +0200
+++ linux-4.9-rc5/crypto/Kconfig	2016-11-22 10:25:03.270801004 +0100
@@ -437,7 +437,7 @@ config CRYPTO_CRC32C_INTEL
 	  gain performance compared with software implementation.
 	  Module will be crc32c-intel.
 
-config CRYPT_CRC32C_VPMSUM
+config CRYPTO_CRC32C_VPMSUM
 	tristate "CRC32c CRC algorithm (powerpc64)"
 	depends on PPC64 && ALTIVEC
 	select CRYPTO_HASH


-- 
Jean Delvare
SUSE L3 Support

^ permalink raw reply

* [PATCH v2 1/2] virtio: introduce little edian functions for virtio_cread/write# family
From: Gonglei @ 2016-11-22  8:10 UTC (permalink / raw)
  To: linux-kernel, qemu-devel, virtio-dev, virtualization,
	linux-crypto
  Cc: luonengjun, mst, stefanha, weidong.huang, wu.wubin, xin.zeng,
	claudio.fontana, herbert, pasic, davem, jianjay.zhou, hanweidong,
	arei.gonglei, cornelia.huck, xuquan8, longpeng2,
	salvatore.benedetto, Gonglei
In-Reply-To: <1479802223-121104-1-git-send-email-arei.gonglei@huawei.com>

Virtio modern devices are always little edian, let's introduce
the LE functions for read/write configuration space for
virtio modern devices, which avoid complaint by Sparse when
we use the virtio_creaed/virtio_cwrite in VIRTIO_1 devices.

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
---
 include/linux/virtio_config.h | 45 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 26c155b..de05707 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -414,4 +414,49 @@ static inline void virtio_cwrite64(struct virtio_device *vdev,
 		_r;							\
 	})
 
+static inline __le16 virtio_cread16_le(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	__le16 ret;
+
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite16_le(struct virtio_device *vdev,
+				   unsigned int offset, __le16 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+static inline __le32 virtio_cread32_le(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	__le32 ret;
+
+	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite32_le(struct virtio_device *vdev,
+				   unsigned int offset, __le32 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
+static inline __le64 virtio_cread64_le(struct virtio_device *vdev,
+				 unsigned int offset)
+{
+	__le64 ret;
+
+	__virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
+	return ret;
+}
+
+static inline void virtio_cwrite64_le(struct virtio_device *vdev,
+				   unsigned int offset, __le64 val)
+{
+	vdev->config->set(vdev, offset, &val, sizeof(val));
+}
+
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
1.8.3.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox