* [PATCH 1/1] crypto: atmel-aes: fix compiler error when VERBOSE_DEBUG is defined
From: Cyrille Pitchen @ 2016-09-29 16:46 UTC (permalink / raw)
To: herbert, davem, nicolas.ferre, levent.demir
Cc: linux-crypto, linux-kernel, linux-arm-kernel, Cyrille Pitchen
This patch fixes a compiler error when VERBOSE_DEBUG is defined. Indeed,
in atmel_aes_write(), the 3rd argument of atmel_aes_reg_name() was
missing.
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Reported-by: Levent Demir <levent.demir@inria.fr>
---
drivers/crypto/atmel-aes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index e3d40a8dfffb..1d9e7bd3f377 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -317,7 +317,7 @@ static inline void atmel_aes_write(struct atmel_aes_dev *dd,
char tmp[16];
dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
- atmel_aes_reg_name(offset, tmp));
+ atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
}
#endif /* VERBOSE_DEBUG */
--
2.7.4
^ permalink raw reply related
* [PATCH 1/1] crypto: atmel-aes: add support to the XTS mode
From: Cyrille Pitchen @ 2016-09-29 16:49 UTC (permalink / raw)
To: herbert, davem, nicolas.ferre
Cc: linux-crypto, linux-kernel, linux-arm-kernel, levent.demir,
Cyrille Pitchen
This patch adds the xts(aes) algorithm, which is supported from
hardware version 0x500 and above (sama5d2x).
Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
drivers/crypto/atmel-aes-regs.h | 4 +
drivers/crypto/atmel-aes.c | 186 ++++++++++++++++++++++++++++++++++++++--
2 files changed, 184 insertions(+), 6 deletions(-)
diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h
index 6c2951bb70b1..0ec04407b533 100644
--- a/drivers/crypto/atmel-aes-regs.h
+++ b/drivers/crypto/atmel-aes-regs.h
@@ -28,6 +28,7 @@
#define AES_MR_OPMOD_CFB (0x3 << 12)
#define AES_MR_OPMOD_CTR (0x4 << 12)
#define AES_MR_OPMOD_GCM (0x5 << 12)
+#define AES_MR_OPMOD_XTS (0x6 << 12)
#define AES_MR_LOD (0x1 << 15)
#define AES_MR_CFBS_MASK (0x7 << 16)
#define AES_MR_CFBS_128b (0x0 << 16)
@@ -67,6 +68,9 @@
#define AES_CTRR 0x98
#define AES_GCMHR(x) (0x9c + ((x) * 0x04))
+#define AES_TWR(x) (0xc0 + ((x) * 0x04))
+#define AES_ALPHAR(x) (0xd0 + ((x) * 0x04))
+
#define AES_HW_VERSION 0xFC
#endif /* __ATMEL_AES_REGS_H__ */
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 1d9e7bd3f377..b14c10e98a06 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -68,6 +68,7 @@
#define AES_FLAGS_CFB8 (AES_MR_OPMOD_CFB | AES_MR_CFBS_8b)
#define AES_FLAGS_CTR AES_MR_OPMOD_CTR
#define AES_FLAGS_GCM AES_MR_OPMOD_GCM
+#define AES_FLAGS_XTS AES_MR_OPMOD_XTS
#define AES_FLAGS_MODE_MASK (AES_FLAGS_OPMODE_MASK | \
AES_FLAGS_ENCRYPT | \
@@ -89,6 +90,7 @@ struct atmel_aes_caps {
bool has_cfb64;
bool has_ctr32;
bool has_gcm;
+ bool has_xts;
u32 max_burst_size;
};
@@ -135,6 +137,12 @@ struct atmel_aes_gcm_ctx {
atmel_aes_fn_t ghash_resume;
};
+struct atmel_aes_xts_ctx {
+ struct atmel_aes_base_ctx base;
+
+ u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
+};
+
struct atmel_aes_reqctx {
unsigned long mode;
};
@@ -282,6 +290,20 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz)
snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
break;
+ case AES_TWR(0):
+ case AES_TWR(1):
+ case AES_TWR(2):
+ case AES_TWR(3):
+ snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2);
+ break;
+
+ case AES_ALPHAR(0):
+ case AES_ALPHAR(1):
+ case AES_ALPHAR(2):
+ case AES_ALPHAR(3):
+ snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2);
+ break;
+
default:
snprintf(tmp, sz, "0x%02x", offset);
break;
@@ -453,15 +475,15 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err)
return err;
}
-static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
- const u32 *iv)
+static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma,
+ const u32 *iv, const u32 *key, int keylen)
{
u32 valmr = 0;
/* MR register must be set before IV registers */
- if (dd->ctx->keylen == AES_KEYSIZE_128)
+ if (keylen == AES_KEYSIZE_128)
valmr |= AES_MR_KEYSIZE_128;
- else if (dd->ctx->keylen == AES_KEYSIZE_192)
+ else if (keylen == AES_KEYSIZE_192)
valmr |= AES_MR_KEYSIZE_192;
else
valmr |= AES_MR_KEYSIZE_256;
@@ -478,13 +500,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
atmel_aes_write(dd, AES_MR, valmr);
- atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
- SIZE_IN_WORDS(dd->ctx->keylen));
+ atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen));
if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB)
atmel_aes_write_block(dd, AES_IVR(0), iv);
}
+static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
+ const u32 *iv)
+
+{
+ atmel_aes_write_ctrl_key(dd, use_dma, iv,
+ dd->ctx->key, dd->ctx->keylen);
+}
/* CPU transfer */
@@ -1769,6 +1797,139 @@ static struct aead_alg aes_gcm_alg = {
};
+/* xts functions */
+
+static inline struct atmel_aes_xts_ctx *
+atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx)
+{
+ return container_of(ctx, struct atmel_aes_xts_ctx, base);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
+
+static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
+{
+ struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
+ struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+ struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+ unsigned long flags;
+ int err;
+
+ atmel_aes_set_mode(dd, rctx);
+
+ err = atmel_aes_hw_init(dd);
+ if (err)
+ return atmel_aes_complete(dd, err);
+
+ /* Compute the tweak value from req->info with ecb(aes). */
+ flags = dd->flags;
+ dd->flags &= ~AES_FLAGS_MODE_MASK;
+ dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
+ atmel_aes_write_ctrl_key(dd, false, NULL,
+ ctx->key2, ctx->base.keylen);
+ dd->flags = flags;
+
+ atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
+ return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
+}
+
+static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
+{
+ struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
+ bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
+ u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
+ static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
+ u8 *tweak_bytes = (u8 *)tweak;
+ int i;
+
+ /* Read the computed ciphered tweak value. */
+ atmel_aes_read_block(dd, AES_ODATAR(0), tweak);
+ /*
+ * Hardware quirk:
+ * the order of the ciphered tweak bytes need to be reverted before
+ * writing them into the ODATARx registers.
+ */
+ for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
+ u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
+
+ tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
+ tweak_bytes[i] = tmp;
+ }
+
+ /* Process the data. */
+ atmel_aes_write_ctrl(dd, use_dma, NULL);
+ atmel_aes_write_block(dd, AES_TWR(0), tweak);
+ atmel_aes_write_block(dd, AES_ALPHAR(0), one);
+ if (use_dma)
+ return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
+ atmel_aes_transfer_complete);
+
+ return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
+ atmel_aes_transfer_complete);
+}
+
+static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+ if (keylen != AES_KEYSIZE_128 * 2 &&
+ keylen != AES_KEYSIZE_192 * 2 &&
+ keylen != AES_KEYSIZE_256 * 2) {
+ crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(ctx->base.key, key, keylen/2);
+ memcpy(ctx->key2, key + keylen/2, keylen/2);
+ ctx->base.keylen = keylen/2;
+
+ return 0;
+}
+
+static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
+{
+ return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
+}
+
+static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
+{
+ return atmel_aes_crypt(req, AES_FLAGS_XTS);
+}
+
+static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+ struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
+ ctx->base.start = atmel_aes_xts_start;
+
+ return 0;
+}
+
+static struct crypto_alg aes_xts_alg = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "atmel-xts-aes",
+ .cra_priority = ATMEL_AES_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
+ .cra_alignmask = 0xf,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = atmel_aes_xts_cra_init,
+ .cra_exit = atmel_aes_cra_exit,
+ .cra_u.ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = atmel_aes_xts_setkey,
+ .encrypt = atmel_aes_xts_encrypt,
+ .decrypt = atmel_aes_xts_decrypt,
+ }
+};
+
+
/* Probe functions */
static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
@@ -1877,6 +2038,9 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd)
{
int i;
+ if (dd->caps.has_xts)
+ crypto_unregister_alg(&aes_xts_alg);
+
if (dd->caps.has_gcm)
crypto_unregister_aead(&aes_gcm_alg);
@@ -1909,8 +2073,16 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd)
goto err_aes_gcm_alg;
}
+ if (dd->caps.has_xts) {
+ err = crypto_register_alg(&aes_xts_alg);
+ if (err)
+ goto err_aes_xts_alg;
+ }
+
return 0;
+err_aes_xts_alg:
+ crypto_unregister_aead(&aes_gcm_alg);
err_aes_gcm_alg:
crypto_unregister_alg(&aes_cfb64_alg);
err_aes_cfb64_alg:
@@ -1928,6 +2100,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
dd->caps.has_cfb64 = 0;
dd->caps.has_ctr32 = 0;
dd->caps.has_gcm = 0;
+ dd->caps.has_xts = 0;
dd->caps.max_burst_size = 1;
/* keep only major version number */
@@ -1937,6 +2110,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
dd->caps.has_cfb64 = 1;
dd->caps.has_ctr32 = 1;
dd->caps.has_gcm = 1;
+ dd->caps.has_xts = 1;
dd->caps.max_burst_size = 4;
break;
case 0x200:
--
2.7.4
^ permalink raw reply related
* [PATCH] crypto: caam - treat SGT address pointer as u64
From: Tudor Ambarus @ 2016-09-29 14:17 UTC (permalink / raw)
To: horia.geanta, herbert; +Cc: linux-crypto, fabio.estevam, Tudor Ambarus
Even for i.MX, CAAM is able to use address pointers greater than
32 bits, the address pointer field being interpreted as a double word.
Enforce u64 address pointer in the sec4_sg_entry struct.
This patch fixes the SGT address pointer endianness issue for
32bit platforms where core endianness != caam endianness.
Signed-off-by: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
---
drivers/crypto/caam/desc.h | 6 ------
drivers/crypto/caam/regs.h | 8 ++++++++
drivers/crypto/caam/sg_sw_sec4.h | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 26427c1..513b664 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -23,13 +23,7 @@
#define SEC4_SG_OFFSET_MASK 0x00001fff
struct sec4_sg_entry {
-#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
- defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
- u32 rsvd1;
- dma_addr_t ptr;
-#else
u64 ptr;
-#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
u32 len;
u32 bpid_offset;
};
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index b3c5016..effbdd8 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
#define caam_dma_to_cpu(value) caam32_to_cpu(value)
#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
+#ifdef CONFIG_SOC_IMX7D
+#define cpu_to_caam_dma64(value) \
+ (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+ (u64)cpu_to_caam32(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
+#endif
+
/*
* jr_outentry
* Represents each entry in a JobR output ring
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 19dc64f..41cd5a3 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -15,7 +15,7 @@ struct sec4_sg_entry;
static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
dma_addr_t dma, u32 len, u16 offset)
{
- sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+ sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
sec4_sg_ptr->len = cpu_to_caam32(len);
sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
#ifdef DEBUG
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH] crypto: caam - treat SGT address pointer as u64
From: Fabio Estevam @ 2016-09-29 16:58 UTC (permalink / raw)
To: Tudor Ambarus; +Cc: horia.geanta, Herbert Xu, linux-crypto, Fabio Estevam
In-Reply-To: <1475158647-16094-1-git-send-email-tudor-dan.ambarus@nxp.com>
Hi Tudor,
On Thu, Sep 29, 2016 at 11:17 AM, Tudor Ambarus
<tudor-dan.ambarus@nxp.com> wrote:
> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> index b3c5016..effbdd8 100644
> --- a/drivers/crypto/caam/regs.h
> +++ b/drivers/crypto/caam/regs.h
> @@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
> #define caam_dma_to_cpu(value) caam32_to_cpu(value)
> #endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
>
> +#ifdef CONFIG_SOC_IMX7D
Why is this restricted to mx7d?
^ permalink raw reply
* Re: [PATCH 1/1] crypto: atmel-aes: add support to the XTS mode
From: Stephan Mueller @ 2016-09-29 17:44 UTC (permalink / raw)
To: Cyrille Pitchen
Cc: herbert, davem, nicolas.ferre, linux-crypto, linux-kernel,
linux-arm-kernel, levent.demir
In-Reply-To: <4bf386be2805a97c59defcd24ee9fb56f190b901.1475167690.git.cyrille.pitchen@atmel.com>
Am Donnerstag, 29. September 2016, 18:49:07 CEST schrieb Cyrille Pitchen:
Hi Cyrille,
> This patch adds the xts(aes) algorithm, which is supported from
> hardware version 0x500 and above (sama5d2x).
>
> Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
> ---
> drivers/crypto/atmel-aes-regs.h | 4 +
> drivers/crypto/atmel-aes.c | 186
> ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 184
> insertions(+), 6 deletions(-)
>
> diff --git a/drivers/crypto/atmel-aes-regs.h
> b/drivers/crypto/atmel-aes-regs.h index 6c2951bb70b1..0ec04407b533 100644
> --- a/drivers/crypto/atmel-aes-regs.h
> +++ b/drivers/crypto/atmel-aes-regs.h
> @@ -28,6 +28,7 @@
> #define AES_MR_OPMOD_CFB (0x3 << 12)
> #define AES_MR_OPMOD_CTR (0x4 << 12)
> #define AES_MR_OPMOD_GCM (0x5 << 12)
> +#define AES_MR_OPMOD_XTS (0x6 << 12)
> #define AES_MR_LOD (0x1 << 15)
> #define AES_MR_CFBS_MASK (0x7 << 16)
> #define AES_MR_CFBS_128b (0x0 << 16)
> @@ -67,6 +68,9 @@
> #define AES_CTRR 0x98
> #define AES_GCMHR(x) (0x9c + ((x) * 0x04))
>
> +#define AES_TWR(x) (0xc0 + ((x) * 0x04))
> +#define AES_ALPHAR(x) (0xd0 + ((x) * 0x04))
> +
> #define AES_HW_VERSION 0xFC
>
> #endif /* __ATMEL_AES_REGS_H__ */
> diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
> index 1d9e7bd3f377..b14c10e98a06 100644
> --- a/drivers/crypto/atmel-aes.c
> +++ b/drivers/crypto/atmel-aes.c
> @@ -68,6 +68,7 @@
> #define AES_FLAGS_CFB8 (AES_MR_OPMOD_CFB | AES_MR_CFBS_8b)
> #define AES_FLAGS_CTR AES_MR_OPMOD_CTR
> #define AES_FLAGS_GCM AES_MR_OPMOD_GCM
> +#define AES_FLAGS_XTS AES_MR_OPMOD_XTS
>
> #define AES_FLAGS_MODE_MASK (AES_FLAGS_OPMODE_MASK | \
> AES_FLAGS_ENCRYPT | \
> @@ -89,6 +90,7 @@ struct atmel_aes_caps {
> bool has_cfb64;
> bool has_ctr32;
> bool has_gcm;
> + bool has_xts;
> u32 max_burst_size;
> };
>
> @@ -135,6 +137,12 @@ struct atmel_aes_gcm_ctx {
> atmel_aes_fn_t ghash_resume;
> };
>
> +struct atmel_aes_xts_ctx {
> + struct atmel_aes_base_ctx base;
> +
> + u32 key2[AES_KEYSIZE_256 / sizeof(u32)];
> +};
> +
> struct atmel_aes_reqctx {
> unsigned long mode;
> };
> @@ -282,6 +290,20 @@ static const char *atmel_aes_reg_name(u32 offset, char
> *tmp, size_t sz) snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >>
> 2);
> break;
>
> + case AES_TWR(0):
> + case AES_TWR(1):
> + case AES_TWR(2):
> + case AES_TWR(3):
> + snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2);
> + break;
> +
> + case AES_ALPHAR(0):
> + case AES_ALPHAR(1):
> + case AES_ALPHAR(2):
> + case AES_ALPHAR(3):
> + snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2);
> + break;
> +
> default:
> snprintf(tmp, sz, "0x%02x", offset);
> break;
> @@ -453,15 +475,15 @@ static inline int atmel_aes_complete(struct
> atmel_aes_dev *dd, int err) return err;
> }
>
> -static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma,
> - const u32 *iv)
> +static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool
> use_dma, + const u32 *iv, const u32 *key, int keylen)
> {
> u32 valmr = 0;
>
> /* MR register must be set before IV registers */
> - if (dd->ctx->keylen == AES_KEYSIZE_128)
> + if (keylen == AES_KEYSIZE_128)
> valmr |= AES_MR_KEYSIZE_128;
> - else if (dd->ctx->keylen == AES_KEYSIZE_192)
> + else if (keylen == AES_KEYSIZE_192)
> valmr |= AES_MR_KEYSIZE_192;
> else
> valmr |= AES_MR_KEYSIZE_256;
> @@ -478,13 +500,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev
> *dd, bool use_dma,
>
> atmel_aes_write(dd, AES_MR, valmr);
>
> - atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key,
> - SIZE_IN_WORDS(dd->ctx->keylen));
> + atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen));
>
> if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB)
> atmel_aes_write_block(dd, AES_IVR(0), iv);
> }
>
> +static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool
> use_dma, + const u32 *iv)
> +
> +{
> + atmel_aes_write_ctrl_key(dd, use_dma, iv,
> + dd->ctx->key, dd->ctx->keylen);
> +}
>
> /* CPU transfer */
>
> @@ -1769,6 +1797,139 @@ static struct aead_alg aes_gcm_alg = {
> };
>
>
> +/* xts functions */
> +
> +static inline struct atmel_aes_xts_ctx *
> +atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx)
> +{
> + return container_of(ctx, struct atmel_aes_xts_ctx, base);
> +}
> +
> +static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd);
> +
> +static int atmel_aes_xts_start(struct atmel_aes_dev *dd)
> +{
> + struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx);
> + struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
> + struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req);
> + unsigned long flags;
> + int err;
> +
> + atmel_aes_set_mode(dd, rctx);
> +
> + err = atmel_aes_hw_init(dd);
> + if (err)
> + return atmel_aes_complete(dd, err);
> +
> + /* Compute the tweak value from req->info with ecb(aes). */
> + flags = dd->flags;
> + dd->flags &= ~AES_FLAGS_MODE_MASK;
> + dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT);
> + atmel_aes_write_ctrl_key(dd, false, NULL,
> + ctx->key2, ctx->base.keylen);
> + dd->flags = flags;
> +
> + atmel_aes_write_block(dd, AES_IDATAR(0), req->info);
> + return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data);
> +}
> +
> +static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd)
> +{
> + struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq);
> + bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD);
> + u32 tweak[AES_BLOCK_SIZE / sizeof(u32)];
> + static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), };
> + u8 *tweak_bytes = (u8 *)tweak;
> + int i;
> +
> + /* Read the computed ciphered tweak value. */
> + atmel_aes_read_block(dd, AES_ODATAR(0), tweak);
> + /*
> + * Hardware quirk:
> + * the order of the ciphered tweak bytes need to be reverted before
> + * writing them into the ODATARx registers.
> + */
> + for (i = 0; i < AES_BLOCK_SIZE/2; ++i) {
> + u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i];
> +
> + tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i];
> + tweak_bytes[i] = tmp;
> + }
> +
> + /* Process the data. */
> + atmel_aes_write_ctrl(dd, use_dma, NULL);
> + atmel_aes_write_block(dd, AES_TWR(0), tweak);
> + atmel_aes_write_block(dd, AES_ALPHAR(0), one);
> + if (use_dma)
> + return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes,
> + atmel_aes_transfer_complete);
> +
> + return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes,
> + atmel_aes_transfer_complete);
> +}
> +
> +static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8
> *key, + unsigned int keylen)
> +{
> + struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm);
> +
> + if (keylen != AES_KEYSIZE_128 * 2 &&
> + keylen != AES_KEYSIZE_192 * 2 &&
> + keylen != AES_KEYSIZE_256 * 2) {
> + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
> + return -EINVAL;
> + }
Please use xts_check_key as a replacement for this code.
> +
> + memcpy(ctx->base.key, key, keylen/2);
> + memcpy(ctx->key2, key + keylen/2, keylen/2);
> + ctx->base.keylen = keylen/2;
> +
> + return 0;
> +}
> +
> +static int atmel_aes_xts_encrypt(struct ablkcipher_request *req)
> +{
> + return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT);
> +}
> +
> +static int atmel_aes_xts_decrypt(struct ablkcipher_request *req)
> +{
> + return atmel_aes_crypt(req, AES_FLAGS_XTS);
> +}
> +
> +static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm)
> +{
> + struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
> +
> + tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx);
> + ctx->base.start = atmel_aes_xts_start;
> +
> + return 0;
> +}
> +
> +static struct crypto_alg aes_xts_alg = {
> + .cra_name = "xts(aes)",
> + .cra_driver_name = "atmel-xts-aes",
> + .cra_priority = ATMEL_AES_PRIORITY,
> + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
> + .cra_blocksize = AES_BLOCK_SIZE,
> + .cra_ctxsize = sizeof(struct atmel_aes_xts_ctx),
> + .cra_alignmask = 0xf,
> + .cra_type = &crypto_ablkcipher_type,
> + .cra_module = THIS_MODULE,
> + .cra_init = atmel_aes_xts_cra_init,
> + .cra_exit = atmel_aes_cra_exit,
> + .cra_u.ablkcipher = {
> + .min_keysize = 2 * AES_MIN_KEY_SIZE,
> + .max_keysize = 2 * AES_MAX_KEY_SIZE,
> + .ivsize = AES_BLOCK_SIZE,
> + .setkey = atmel_aes_xts_setkey,
> + .encrypt = atmel_aes_xts_encrypt,
> + .decrypt = atmel_aes_xts_decrypt,
> + }
> +};
> +
> +
> /* Probe functions */
>
> static int atmel_aes_buff_init(struct atmel_aes_dev *dd)
> @@ -1877,6 +2038,9 @@ static void atmel_aes_unregister_algs(struct
> atmel_aes_dev *dd) {
> int i;
>
> + if (dd->caps.has_xts)
> + crypto_unregister_alg(&aes_xts_alg);
> +
> if (dd->caps.has_gcm)
> crypto_unregister_aead(&aes_gcm_alg);
>
> @@ -1909,8 +2073,16 @@ static int atmel_aes_register_algs(struct
> atmel_aes_dev *dd) goto err_aes_gcm_alg;
> }
>
> + if (dd->caps.has_xts) {
> + err = crypto_register_alg(&aes_xts_alg);
> + if (err)
> + goto err_aes_xts_alg;
> + }
> +
> return 0;
>
> +err_aes_xts_alg:
> + crypto_unregister_aead(&aes_gcm_alg);
> err_aes_gcm_alg:
> crypto_unregister_alg(&aes_cfb64_alg);
> err_aes_cfb64_alg:
> @@ -1928,6 +2100,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev
> *dd) dd->caps.has_cfb64 = 0;
> dd->caps.has_ctr32 = 0;
> dd->caps.has_gcm = 0;
> + dd->caps.has_xts = 0;
> dd->caps.max_burst_size = 1;
>
> /* keep only major version number */
> @@ -1937,6 +2110,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev
> *dd) dd->caps.has_cfb64 = 1;
> dd->caps.has_ctr32 = 1;
> dd->caps.has_gcm = 1;
> + dd->caps.has_xts = 1;
> dd->caps.max_burst_size = 4;
> break;
> case 0x200:
Ciao
Stephan
^ permalink raw reply
* [PATCH] arm64: add support for SHA256 using NEON instructions
From: Ard Biesheuvel @ 2016-09-29 22:51 UTC (permalink / raw)
To: linux-arm-kernel, linux-crypto, herbert
Cc: appro, victor.chong, daniel.thompson, will.deacon,
catalin.marinas, Ard Biesheuvel
This is a port of the ARMv7 implementation in arch/arm/crypto. For a Cortex-A57
(r2p1), the performance numbers are listed below. In summary, 40% - 50% speedup
where it counts, i.e., block sizes over 256 bytes with few updates.
testing speed of async sha256 (sha256-generic)
( 16 byte blocks, 16 bytes x 1 updates): 1379992 ops/s, 22079872 Bps
( 64 byte blocks, 16 bytes x 4 updates): 633455 ops/s, 40541120 Bps
( 64 byte blocks, 64 bytes x 1 updates): 738076 ops/s, 47236864 Bps
( 256 byte blocks, 16 bytes x 16 updates): 234420 ops/s, 60011520 Bps
( 256 byte blocks, 64 bytes x 4 updates): 293008 ops/s, 75010048 Bps
( 256 byte blocks, 256 bytes x 1 updates): 309600 ops/s, 79257600 Bps
( 1024 byte blocks, 16 bytes x 64 updates): 66997 ops/s, 68604928 Bps
( 1024 byte blocks, 256 bytes x 4 updates): 91912 ops/s, 94117888 Bps
( 1024 byte blocks, 1024 bytes x 1 updates): 93992 ops/s, 96247808 Bps
( 2048 byte blocks, 16 bytes x 128 updates): 34385 ops/s, 70420480 Bps
( 2048 byte blocks, 256 bytes x 8 updates): 47570 ops/s, 97423360 Bps
( 2048 byte blocks, 1024 bytes x 2 updates): 48557 ops/s, 99444736 Bps
( 2048 byte blocks, 2048 bytes x 1 updates): 48781 ops/s, 99903488 Bps
( 4096 byte blocks, 16 bytes x 256 updates): 17401 ops/s, 71274496 Bps
( 4096 byte blocks, 256 bytes x 16 updates): 24211 ops/s, 99168256 Bps
( 4096 byte blocks, 1024 bytes x 4 updates): 24720 ops/s, 101253120 Bps
( 4096 byte blocks, 4096 bytes x 1 updates): 24930 ops/s, 102113280 Bps
( 8192 byte blocks, 16 bytes x 512 updates): 8738 ops/s, 71581696 Bps
( 8192 byte blocks, 256 bytes x 32 updates): 12214 ops/s, 100057088 Bps
( 8192 byte blocks, 1024 bytes x 8 updates): 12474 ops/s, 102187008 Bps
( 8192 byte blocks, 4096 bytes x 2 updates): 12558 ops/s, 102875136 Bps
( 8192 byte blocks, 8192 bytes x 1 updates): 12555 ops/s, 102850560 Bps
testing speed of async sha256 (sha256-neon)
( 16 byte blocks, 16 bytes x 1 updates): 1802881 ops/s, 28846096 Bps
( 64 byte blocks, 16 bytes x 4 updates): 744861 ops/s, 47671104 Bps
( 64 byte blocks, 64 bytes x 1 updates): 1015413 ops/s, 64986432 Bps
( 256 byte blocks, 16 bytes x 16 updates): 281055 ops/s, 71950080 Bps
( 256 byte blocks, 64 bytes x 4 updates): 378437 ops/s, 96879872 Bps
( 256 byte blocks, 256 bytes x 1 updates): 453325 ops/s, 116051200 Bps
( 1024 byte blocks, 16 bytes x 64 updates): 79809 ops/s, 81724416 Bps
( 1024 byte blocks, 256 bytes x 4 updates): 131621 ops/s, 134779904 Bps
( 1024 byte blocks, 1024 bytes x 1 updates): 140708 ops/s, 144084992 Bps
( 2048 byte blocks, 16 bytes x 128 updates): 40900 ops/s, 83763200 Bps
( 2048 byte blocks, 256 bytes x 8 updates): 68348 ops/s, 139976704 Bps
( 2048 byte blocks, 1024 bytes x 2 updates): 72051 ops/s, 147560448 Bps
( 2048 byte blocks, 2048 bytes x 1 updates): 73358 ops/s, 150237184 Bps
( 4096 byte blocks, 16 bytes x 256 updates): 20746 ops/s, 84975616 Bps
( 4096 byte blocks, 256 bytes x 16 updates): 34842 ops/s, 142712832 Bps
( 4096 byte blocks, 1024 bytes x 4 updates): 36794 ops/s, 150708224 Bps
( 4096 byte blocks, 4096 bytes x 1 updates): 37422 ops/s, 153280512 Bps
( 8192 byte blocks, 16 bytes x 512 updates): 10428 ops/s, 85426176 Bps
( 8192 byte blocks, 256 bytes x 32 updates): 17600 ops/s, 144179200 Bps
( 8192 byte blocks, 1024 bytes x 8 updates): 18594 ops/s, 152322048 Bps
( 8192 byte blocks, 4096 bytes x 2 updates): 18858 ops/s, 154484736 Bps
( 8192 byte blocks, 8192 bytes x 1 updates): 18880 ops/s, 154664960 Bps
testing speed of async sha256 (sha256-ce)
( 16 byte blocks, 16 bytes x 1 updates): 4107417 ops/s, 65718672 Bps
( 64 byte blocks, 16 bytes x 4 updates): 1418054 ops/s, 90755456 Bps
( 64 byte blocks, 64 bytes x 1 updates): 3323045 ops/s, 212674880 Bps
( 256 byte blocks, 16 bytes x 16 updates): 450084 ops/s, 115221504 Bps
( 256 byte blocks, 64 bytes x 4 updates): 1034376 ops/s, 264800256 Bps
( 256 byte blocks, 256 bytes x 1 updates): 1798744 ops/s, 460478464 Bps
( 1024 byte blocks, 16 bytes x 64 updates): 121411 ops/s, 124324864 Bps
( 1024 byte blocks, 256 bytes x 4 updates): 506086 ops/s, 518232064 Bps
( 1024 byte blocks, 1024 bytes x 1 updates): 634485 ops/s, 649712640 Bps
( 2048 byte blocks, 16 bytes x 128 updates): 61520 ops/s, 125992960 Bps
( 2048 byte blocks, 256 bytes x 8 updates): 266787 ops/s, 546379776 Bps
( 2048 byte blocks, 1024 bytes x 2 updates): 316910 ops/s, 649031680 Bps
( 2048 byte blocks, 2048 bytes x 1 updates): 342777 ops/s, 702007296 Bps
( 4096 byte blocks, 16 bytes x 256 updates): 31003 ops/s, 126988288 Bps
( 4096 byte blocks, 256 bytes x 16 updates): 138097 ops/s, 565645312 Bps
( 4096 byte blocks, 1024 bytes x 4 updates): 164319 ops/s, 673050624 Bps
( 4096 byte blocks, 4096 bytes x 1 updates): 176310 ops/s, 722165760 Bps
( 8192 byte blocks, 16 bytes x 512 updates): 15566 ops/s, 127516672 Bps
( 8192 byte blocks, 256 bytes x 32 updates): 69608 ops/s, 570228736 Bps
( 8192 byte blocks, 1024 bytes x 8 updates): 83682 ops/s, 685522944 Bps
( 8192 byte blocks, 4096 bytes x 2 updates): 88813 ops/s, 727556096 Bps
( 8192 byte blocks, 8192 bytes x 1 updates): 88781 ops/s, 727293952 Bps
Ard Biesheuvel (1):
crypto: arm64/sha256 - add support for SHA256 using NEON instructions
arch/arm64/crypto/Kconfig | 5 +
arch/arm64/crypto/Makefile | 11 +
arch/arm64/crypto/sha256-armv4.pl | 413 +++++++++
arch/arm64/crypto/sha256-core.S_shipped | 883 ++++++++++++++++++++
arch/arm64/crypto/sha256_neon_glue.c | 103 +++
5 files changed, 1415 insertions(+)
create mode 100644 arch/arm64/crypto/sha256-armv4.pl
create mode 100644 arch/arm64/crypto/sha256-core.S_shipped
create mode 100644 arch/arm64/crypto/sha256_neon_glue.c
--
2.7.4
^ permalink raw reply
* [PATCH] crypto: arm64/sha256 - add support for SHA256 using NEON instructions
From: Ard Biesheuvel @ 2016-09-29 22:51 UTC (permalink / raw)
To: linux-arm-kernel, linux-crypto, herbert
Cc: appro, victor.chong, daniel.thompson, will.deacon,
catalin.marinas, Ard Biesheuvel
In-Reply-To: <1475189503-9175-1-git-send-email-ard.biesheuvel@linaro.org>
This is a port to arm64 of the NEON implementation of SHA256 that lives
under arch/arm/crypto.
Due to the fact that the AArch64 assembler dialect deviates from the
32-bit ARM one in ways that makes sharing code problematic, and given
that this version only uses the NEON version whereas the original
implementation supports plain ALU assembler, NEON and Crypto Extensions,
this code is built from a version sha256-armv4.pl that has been
transliterated to the AArch64 NEON dialect.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/Kconfig | 5 +
arch/arm64/crypto/Makefile | 11 +
arch/arm64/crypto/sha256-armv4.pl | 413 +++++++++
arch/arm64/crypto/sha256-core.S_shipped | 883 ++++++++++++++++++++
arch/arm64/crypto/sha256_neon_glue.c | 103 +++
5 files changed, 1415 insertions(+)
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2cf32e9887e1..d32371198474 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -18,6 +18,11 @@ config CRYPTO_SHA2_ARM64_CE
depends on ARM64 && KERNEL_MODE_NEON
select CRYPTO_HASH
+config CRYPTO_SHA2_ARM64_NEON
+ tristate "SHA-224/SHA-256 digest algorithm (ARMv8 NEON)"
+ depends on ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_HASH
+
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index abb79b3cfcfe..5156ebee0488 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -29,6 +29,9 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
aes-neon-blk-y := aes-glue-neon.o aes-neon.o
+obj-$(CONFIG_CRYPTO_SHA2_ARM64_NEON) := sha256-neon.o
+sha256-neon-y := sha256_neon_glue.o sha256-core.o
+
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
@@ -40,3 +43,11 @@ CFLAGS_crc32-arm64.o := -mcpu=generic+crc
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
+
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $(<) > $(@)
+
+$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/sha256-core.S
diff --git a/arch/arm64/crypto/sha256-armv4.pl b/arch/arm64/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..9ff788339b1c
--- /dev/null
+++ b/arch/arm64/crypto/sha256-armv4.pl
@@ -0,0 +1,413 @@
+#!/usr/bin/env perl
+
+#
+# AArch64 port of the OpenSSL SHA256 implementation for ARM NEON
+#
+# Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA256 block procedure for ARMv4. May 2007.
+
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte [on single-issue Xscale PXA250 core].
+
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 22% improvement on
+# Cortex A8 core and ~20 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 16%
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="x0"; $t0="w0"; $xt0="x0";
+$inp="x1"; $t4="w1"; $xt4="x1";
+$len="x2"; $t1="w2"; $xt1="x2";
+ $t3="w3";
+$A="w4";
+$B="w5";
+$C="w6";
+$D="w7";
+$E="w8";
+$F="w9";
+$G="w10";
+$H="w11";
+@V=($A,$B,$C,$D,$E,$F,$G,$H);
+$t2="w12";
+$xt2="x12";
+$Ktbl="x14";
+
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+
+######################################################################
+# NEON stuff
+#
+{{{
+my @VB=map("v$_.16b",(0..3));
+my @VS=map("v$_.4s",(0..3));
+
+my ($TS0,$TS1,$TS2,$TS3,$TS4,$TS5,$TS6,$TS7)=("v4.4s","v5.4s","v6.4s","v7.4s","v8.4s","v9.4s","v10.4s","v11.4s");
+my ($TB0,$TB1,$TB2,$TB3,$TB4,$TB5,$TB6,$TB7)=("v4.16b","v5.16b","v6.16b","v7.16b","v8.16b","v9.16b","v10.16b","v11.16b");
+my ($TD5HI,$TD5LO,$TD7LO)=("v9.d[1]", "d9", "v11.d[0]");
+my $Xfer=$xt4;
+my $j=0;
+
+sub AUTOLOAD() # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+ my $arg = pop;
+ $arg = "#$arg" if ($arg*1 eq $arg);
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ &ext ($TB0,@VB[0],@VB[1],4); # X[1..4]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ext ($TB1,@VB[2],@VB[3],4); # X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS2,$TS0,$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add (@VS[0],@VS[0],$TS1); # X[0..3] += X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS1,$TS0,$sigma0[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS2,$TS0,32-$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS3,$TS0,$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB1,$TB1,$TB2);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS3,$TS0,32-$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS4,@VS[3],$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB1,$TB1,$TB3); # sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS4,@VS[3],32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS5,@VS[3],$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add (@VS[0],@VS[0],$TS1); # X[0..3] += sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB5,$TB5,$TB4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS4,@VS[3],$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS4,@VS[3],32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB5,$TB5,$TB4); # sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &mov ($TD5LO, $TD5HI);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add (@VS[0],@VS[0],$TS5); # X[0..1] += sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS6,@VS[0],$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS6,@VS[0],32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS7,@VS[0],$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB7,$TB7,$TB6);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ushr ($TS6,@VS[0],$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ld1 ("{$TS0}","[$Ktbl], #16");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &sli ($TS6,@VS[0],32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB7,$TB7,$TB6); # sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &eor ($TB5,$TB5,$TB5);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &mov ($TD5HI, $TD7LO);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add (@VS[0],@VS[0],$TS5); # X[0..3] += sigma1(X[14..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add ($TS0,$TS0,@VS[0]);
+ while($#insns>=2) { eval(shift(@insns)); }
+ &st1 ("{$TS0}","[$Xfer], #16");
+ eval(shift(@insns));
+ eval(shift(@insns));
+
+ push(@VB,shift(@VB)); # "rotate" X[]
+ push(@VS,shift(@VS)); # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &ld1 ("{$TS0}","[$Ktbl], #16");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &rev32 (@VB[0],@VB[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &add ($TS0,$TS0,@VS[0]);
+ foreach (@insns) { eval; } # remaining instructions
+ &st1 ("{$TS0}","[$Xfer], #16");
+
+ push(@VB,shift(@VB)); # "rotate" X[]
+ push(@VS,shift(@VS)); # "rotate" X[]
+}
+
+sub body_00_15 () {
+ (
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
+ '&eor ($t1,$f,$g)',
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
+ '&and ($t1,$t1,$e)',
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+ '&ror ($t2,$t2,"#$Sigma1[0]")',
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
+ '&add ($h,$h,$t2)', # h+=Sigma1(e)
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
+ '&ldr ($xt1,"[sp,#64]") if ($j==31)',
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
+ '&ror ($t0,$t0,"#$Sigma0[0]")',
+ '&add ($d,$d,$h)', # d+=h
+ '&add ($h,$h,$t0);'. # h+=Sigma0(a)
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+ )
+}
+
+$code.=<<___;
+
+.text
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 // terminator
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ sub sp,sp,#16*4+32
+ adr $Ktbl,K256
+ bic x15,x15,#15 // align for 128-bit stores
+ add $len,$inp,$len,lsl#6 // len to point at the end of inp
+
+ ld1 {@VB[0]},[$inp], #16
+ ld1 {@VB[1]},[$inp], #16
+ ld1 {@VB[2]},[$inp], #16
+ ld1 {@VB[3]},[$inp], #16
+ ld1 {$TS0},[$Ktbl], #16
+ ld1 {$TS1},[$Ktbl], #16
+ ld1 {$TS2},[$Ktbl], #16
+ ld1 {$TS3},[$Ktbl], #16
+ rev32 @VB[0],@VB[0] // yes, even on
+ str $ctx,[sp,#64]
+ rev32 @VB[1],@VB[1] // big-endian
+ str $inp,[sp,#72]
+ mov $Xfer,sp
+ rev32 @VB[2],@VB[2]
+ str $len,[sp,#80]
+ rev32 @VB[3],@VB[3]
+ add $TS0,$TS0,@VS[0]
+ add $TS1,$TS1,@VS[1]
+ st1 {$TS0},[$Xfer], #16
+ add $TS2,$TS2,@VS[2]
+ st1 {$TS1},[$Xfer], #16
+ add $TS3,$TS3,@VS[3]
+ st1 {$TS2-$TS3},[$Xfer], #32
+
+ ldp $A, $B, [$ctx]
+ ldp $C, $D, [$ctx, #8]
+ ldp $E, $F, [$ctx, #16]
+ ldp $G, $H, [$ctx, #24]
+ sub $Xfer,$Xfer,#64
+ ldr $t1,[sp,#0]
+ mov $xt2,xzr
+ eor $t3,$B,$C
+ b .L_00_48
+
+.align 4
+.L_00_48:
+___
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+$code.=<<___;
+ cmp $t1,#0 // check for K256 terminator
+ ldr $t1,[sp,#0]
+ sub $Xfer,$Xfer,#64
+ bne .L_00_48
+
+ ldr $inp,[sp,#72]
+ ldr $xt0,[sp,#80]
+ sub $Ktbl,$Ktbl,#256 // rewind $Ktbl
+ cmp $inp,$xt0
+ mov $xt0, #64
+ csel $xt0, $xt0, xzr, eq
+ sub $inp,$inp,$xt0 // avoid SEGV
+ ld1 {@VS[0]},[$inp], #16 // load next input block
+ ld1 {@VS[1]},[$inp], #16
+ ld1 {@VS[2]},[$inp], #16
+ ld1 {@VS[3]},[$inp], #16
+ str $inp,[sp,#72]
+ mov $Xfer,sp
+___
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+$code.=<<___;
+ ldr $t0,[$xt1,#0]
+ add $A,$A,$t2 // h+=Maj(a,b,c) from the past
+ ldr $t2,[$xt1,#4]
+ ldr $t3,[$xt1,#8]
+ ldr $t4,[$xt1,#12]
+ add $A,$A,$t0 // accumulate
+ ldr $t0,[$xt1,#16]
+ add $B,$B,$t2
+ ldr $t2,[$xt1,#20]
+ add $C,$C,$t3
+ ldr $t3,[$xt1,#24]
+ add $D,$D,$t4
+ ldr $t4,[$xt1,#28]
+ add $E,$E,$t0
+ str $A,[$xt1],#4
+ add $F,$F,$t2
+ str $B,[$xt1],#4
+ add $G,$G,$t3
+ str $C,[$xt1],#4
+ add $H,$H,$t4
+ str $D,[$xt1],#4
+
+ stp $E, $F, [$xt1]
+ stp $G, $H, [$xt1, #8]
+
+ b.eq 0f
+ mov $Xfer,sp
+ ldr $t1,[sp,#0]
+ eor $t2,$t2,$t2
+ eor $t3,$B,$C
+ b .L_00_48
+
+0: add sp,sp,#16*4+32
+ ldp x29, x30, [sp], #16
+ ret
+
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+___
+}}}
+
+foreach (split($/,$code)) {
+
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
+
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..1d9b55367ee0
--- /dev/null
+++ b/arch/arm64/crypto/sha256-core.S_shipped
@@ -0,0 +1,883 @@
+
+.text
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 // terminator
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ sub sp,sp,#16*4+32
+ adr x14,K256
+ bic x15,x15,#15 // align for 128-bit stores
+ add x2,x1,x2,lsl#6 // len to point at the end of inp
+
+ ld1 {v0.16b},[x1], #16
+ ld1 {v1.16b},[x1], #16
+ ld1 {v2.16b},[x1], #16
+ ld1 {v3.16b},[x1], #16
+ ld1 {v4.4s},[x14], #16
+ ld1 {v5.4s},[x14], #16
+ ld1 {v6.4s},[x14], #16
+ ld1 {v7.4s},[x14], #16
+ rev32 v0.16b,v0.16b // yes, even on
+ str x0,[sp,#64]
+ rev32 v1.16b,v1.16b // big-endian
+ str x1,[sp,#72]
+ mov x1,sp
+ rev32 v2.16b,v2.16b
+ str x2,[sp,#80]
+ rev32 v3.16b,v3.16b
+ add v4.4s,v4.4s,v0.4s
+ add v5.4s,v5.4s,v1.4s
+ st1 {v4.4s},[x1], #16
+ add v6.4s,v6.4s,v2.4s
+ st1 {v5.4s},[x1], #16
+ add v7.4s,v7.4s,v3.4s
+ st1 {v6.4s-v7.4s},[x1], #32
+
+ ldp w4, w5, [x0]
+ ldp w6, w7, [x0, #8]
+ ldp w8, w9, [x0, #16]
+ ldp w10, w11, [x0, #24]
+ sub x1,x1,#64
+ ldr w2,[sp,#0]
+ mov x12,xzr
+ eor w3,w5,w6
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ ext v4.16b,v0.16b,v1.16b,#4
+ add w11,w11,w2
+ eor w2,w9,w10
+ eor w0,w8,w8,ror#5
+ ext v5.16b,v2.16b,v3.16b,#4
+ add w4,w4,w12
+ and w2,w2,w8
+ eor w12,w0,w8,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w0,w4,w4,ror#11
+ ror w12,w12,#6
+ add v0.4s,v0.4s,v5.4s
+ eor w2,w2,w10
+ add w11,w11,w12
+ ushr v5.4s,v4.4s,#3
+ eor w12,w4,w5
+ eor w0,w0,w4,ror#20
+ sli v6.4s,v4.4s,#25
+ add w11,w11,w2
+ ldr w2,[sp,#4]
+ ushr v7.4s,v4.4s,#18
+ and w3,w3,w12
+ ror w0,w0,#2
+ eor v5.16b,v5.16b,v6.16b
+ add w7,w7,w11
+ add w11,w11,w0
+ eor w3,w3,w5
+ sli v7.4s,v4.4s,#14
+ add w10,w10,w2
+ ushr v8.4s,v3.4s,#17
+ eor w2,w8,w9
+ eor w0,w7,w7,ror#5
+ eor v5.16b,v5.16b,v7.16b
+ add w11,w11,w3
+ and w2,w2,w7
+ sli v8.4s,v3.4s,#15
+ eor w3,w0,w7,ror#19
+ eor w0,w11,w11,ror#11
+ ushr v9.4s,v3.4s,#10
+ ror w3,w3,#6
+ eor w2,w2,w9
+ add v0.4s,v0.4s,v5.4s
+ add w10,w10,w3
+ eor w3,w11,w4
+ eor v9.16b,v9.16b,v8.16b
+ eor w0,w0,w11,ror#20
+ add w10,w10,w2
+ ushr v8.4s,v3.4s,#19
+ ldr w2,[sp,#8]
+ and w12,w12,w3
+ sli v8.4s,v3.4s,#13
+ ror w0,w0,#2
+ add w6,w6,w10
+ eor v9.16b,v9.16b,v8.16b
+ add w10,w10,w0
+ eor w12,w12,w4
+ mov d9,v9.d[1]
+ add w9,w9,w2
+ eor w2,w7,w8
+ add v0.4s,v0.4s,v9.4s
+ eor w0,w6,w6,ror#5
+ add w10,w10,w12
+ ushr v10.4s,v0.4s,#17
+ and w2,w2,w6
+ eor w12,w0,w6,ror#19
+ sli v10.4s,v0.4s,#15
+ eor w0,w10,w10,ror#11
+ ror w12,w12,#6
+ ushr v11.4s,v0.4s,#10
+ eor w2,w2,w8
+ add w9,w9,w12
+ eor v11.16b,v11.16b,v10.16b
+ eor w12,w10,w11
+ eor w0,w0,w10,ror#20
+ ushr v10.4s,v0.4s,#19
+ add w9,w9,w2
+ ldr w2,[sp,#12]
+ ld1 {v4.4s},[x14], #16
+ and w3,w3,w12
+ ror w0,w0,#2
+ sli v10.4s,v0.4s,#13
+ add w5,w5,w9
+ add w9,w9,w0
+ eor w3,w3,w11
+ eor v11.16b,v11.16b,v10.16b
+ add w8,w8,w2
+ eor v9.16b,v9.16b,v9.16b
+ eor w2,w6,w7
+ eor w0,w5,w5,ror#5
+ mov v9.d[1],v11.d[0]
+ add w9,w9,w3
+ and w2,w2,w5
+ add v0.4s,v0.4s,v9.4s
+ eor w3,w0,w5,ror#19
+ eor w0,w9,w9,ror#11
+ add v4.4s,v4.4s,v0.4s
+ ror w3,w3,#6
+ eor w2,w2,w7
+ add w8,w8,w3
+ eor w3,w9,w10
+ eor w0,w0,w9,ror#20
+ add w8,w8,w2
+ ldr w2,[sp,#16]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w4,w4,w8
+ st1 {v4.4s},[x1], #16
+ add w8,w8,w0
+ eor w12,w12,w10
+ ext v4.16b,v1.16b,v2.16b,#4
+ add w7,w7,w2
+ eor w2,w5,w6
+ eor w0,w4,w4,ror#5
+ ext v5.16b,v3.16b,v0.16b,#4
+ add w8,w8,w12
+ and w2,w2,w4
+ eor w12,w0,w4,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w0,w8,w8,ror#11
+ ror w12,w12,#6
+ add v1.4s,v1.4s,v5.4s
+ eor w2,w2,w6
+ add w7,w7,w12
+ ushr v5.4s,v4.4s,#3
+ eor w12,w8,w9
+ eor w0,w0,w8,ror#20
+ sli v6.4s,v4.4s,#25
+ add w7,w7,w2
+ ldr w2,[sp,#20]
+ ushr v7.4s,v4.4s,#18
+ and w3,w3,w12
+ ror w0,w0,#2
+ eor v5.16b,v5.16b,v6.16b
+ add w11,w11,w7
+ add w7,w7,w0
+ eor w3,w3,w9
+ sli v7.4s,v4.4s,#14
+ add w6,w6,w2
+ ushr v8.4s,v0.4s,#17
+ eor w2,w4,w5
+ eor w0,w11,w11,ror#5
+ eor v5.16b,v5.16b,v7.16b
+ add w7,w7,w3
+ and w2,w2,w11
+ sli v8.4s,v0.4s,#15
+ eor w3,w0,w11,ror#19
+ eor w0,w7,w7,ror#11
+ ushr v9.4s,v0.4s,#10
+ ror w3,w3,#6
+ eor w2,w2,w5
+ add v1.4s,v1.4s,v5.4s
+ add w6,w6,w3
+ eor w3,w7,w8
+ eor v9.16b,v9.16b,v8.16b
+ eor w0,w0,w7,ror#20
+ add w6,w6,w2
+ ushr v8.4s,v0.4s,#19
+ ldr w2,[sp,#24]
+ and w12,w12,w3
+ sli v8.4s,v0.4s,#13
+ ror w0,w0,#2
+ add w10,w10,w6
+ eor v9.16b,v9.16b,v8.16b
+ add w6,w6,w0
+ eor w12,w12,w8
+ mov d9,v9.d[1]
+ add w5,w5,w2
+ eor w2,w11,w4
+ add v1.4s,v1.4s,v9.4s
+ eor w0,w10,w10,ror#5
+ add w6,w6,w12
+ ushr v10.4s,v1.4s,#17
+ and w2,w2,w10
+ eor w12,w0,w10,ror#19
+ sli v10.4s,v1.4s,#15
+ eor w0,w6,w6,ror#11
+ ror w12,w12,#6
+ ushr v11.4s,v1.4s,#10
+ eor w2,w2,w4
+ add w5,w5,w12
+ eor v11.16b,v11.16b,v10.16b
+ eor w12,w6,w7
+ eor w0,w0,w6,ror#20
+ ushr v10.4s,v1.4s,#19
+ add w5,w5,w2
+ ldr w2,[sp,#28]
+ ld1 {v4.4s},[x14], #16
+ and w3,w3,w12
+ ror w0,w0,#2
+ sli v10.4s,v1.4s,#13
+ add w9,w9,w5
+ add w5,w5,w0
+ eor w3,w3,w7
+ eor v11.16b,v11.16b,v10.16b
+ add w4,w4,w2
+ eor v9.16b,v9.16b,v9.16b
+ eor w2,w10,w11
+ eor w0,w9,w9,ror#5
+ mov v9.d[1],v11.d[0]
+ add w5,w5,w3
+ and w2,w2,w9
+ add v1.4s,v1.4s,v9.4s
+ eor w3,w0,w9,ror#19
+ eor w0,w5,w5,ror#11
+ add v4.4s,v4.4s,v1.4s
+ ror w3,w3,#6
+ eor w2,w2,w11
+ add w4,w4,w3
+ eor w3,w5,w6
+ eor w0,w0,w5,ror#20
+ add w4,w4,w2
+ ldr w2,[sp,#32]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w8,w8,w4
+ st1 {v4.4s},[x1], #16
+ add w4,w4,w0
+ eor w12,w12,w6
+ ext v4.16b,v2.16b,v3.16b,#4
+ add w11,w11,w2
+ eor w2,w9,w10
+ eor w0,w8,w8,ror#5
+ ext v5.16b,v0.16b,v1.16b,#4
+ add w4,w4,w12
+ and w2,w2,w8
+ eor w12,w0,w8,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w0,w4,w4,ror#11
+ ror w12,w12,#6
+ add v2.4s,v2.4s,v5.4s
+ eor w2,w2,w10
+ add w11,w11,w12
+ ushr v5.4s,v4.4s,#3
+ eor w12,w4,w5
+ eor w0,w0,w4,ror#20
+ sli v6.4s,v4.4s,#25
+ add w11,w11,w2
+ ldr w2,[sp,#36]
+ ushr v7.4s,v4.4s,#18
+ and w3,w3,w12
+ ror w0,w0,#2
+ eor v5.16b,v5.16b,v6.16b
+ add w7,w7,w11
+ add w11,w11,w0
+ eor w3,w3,w5
+ sli v7.4s,v4.4s,#14
+ add w10,w10,w2
+ ushr v8.4s,v1.4s,#17
+ eor w2,w8,w9
+ eor w0,w7,w7,ror#5
+ eor v5.16b,v5.16b,v7.16b
+ add w11,w11,w3
+ and w2,w2,w7
+ sli v8.4s,v1.4s,#15
+ eor w3,w0,w7,ror#19
+ eor w0,w11,w11,ror#11
+ ushr v9.4s,v1.4s,#10
+ ror w3,w3,#6
+ eor w2,w2,w9
+ add v2.4s,v2.4s,v5.4s
+ add w10,w10,w3
+ eor w3,w11,w4
+ eor v9.16b,v9.16b,v8.16b
+ eor w0,w0,w11,ror#20
+ add w10,w10,w2
+ ushr v8.4s,v1.4s,#19
+ ldr w2,[sp,#40]
+ and w12,w12,w3
+ sli v8.4s,v1.4s,#13
+ ror w0,w0,#2
+ add w6,w6,w10
+ eor v9.16b,v9.16b,v8.16b
+ add w10,w10,w0
+ eor w12,w12,w4
+ mov d9,v9.d[1]
+ add w9,w9,w2
+ eor w2,w7,w8
+ add v2.4s,v2.4s,v9.4s
+ eor w0,w6,w6,ror#5
+ add w10,w10,w12
+ ushr v10.4s,v2.4s,#17
+ and w2,w2,w6
+ eor w12,w0,w6,ror#19
+ sli v10.4s,v2.4s,#15
+ eor w0,w10,w10,ror#11
+ ror w12,w12,#6
+ ushr v11.4s,v2.4s,#10
+ eor w2,w2,w8
+ add w9,w9,w12
+ eor v11.16b,v11.16b,v10.16b
+ eor w12,w10,w11
+ eor w0,w0,w10,ror#20
+ ushr v10.4s,v2.4s,#19
+ add w9,w9,w2
+ ldr w2,[sp,#44]
+ ld1 {v4.4s},[x14], #16
+ and w3,w3,w12
+ ror w0,w0,#2
+ sli v10.4s,v2.4s,#13
+ add w5,w5,w9
+ add w9,w9,w0
+ eor w3,w3,w11
+ eor v11.16b,v11.16b,v10.16b
+ add w8,w8,w2
+ eor v9.16b,v9.16b,v9.16b
+ eor w2,w6,w7
+ eor w0,w5,w5,ror#5
+ mov v9.d[1],v11.d[0]
+ add w9,w9,w3
+ and w2,w2,w5
+ add v2.4s,v2.4s,v9.4s
+ eor w3,w0,w5,ror#19
+ eor w0,w9,w9,ror#11
+ add v4.4s,v4.4s,v2.4s
+ ror w3,w3,#6
+ eor w2,w2,w7
+ add w8,w8,w3
+ eor w3,w9,w10
+ eor w0,w0,w9,ror#20
+ add w8,w8,w2
+ ldr w2,[sp,#48]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w4,w4,w8
+ st1 {v4.4s},[x1], #16
+ add w8,w8,w0
+ eor w12,w12,w10
+ ext v4.16b,v3.16b,v0.16b,#4
+ add w7,w7,w2
+ eor w2,w5,w6
+ eor w0,w4,w4,ror#5
+ ext v5.16b,v1.16b,v2.16b,#4
+ add w8,w8,w12
+ and w2,w2,w4
+ eor w12,w0,w4,ror#19
+ ushr v6.4s,v4.4s,#7
+ eor w0,w8,w8,ror#11
+ ror w12,w12,#6
+ add v3.4s,v3.4s,v5.4s
+ eor w2,w2,w6
+ add w7,w7,w12
+ ushr v5.4s,v4.4s,#3
+ eor w12,w8,w9
+ eor w0,w0,w8,ror#20
+ sli v6.4s,v4.4s,#25
+ add w7,w7,w2
+ ldr w2,[sp,#52]
+ ushr v7.4s,v4.4s,#18
+ and w3,w3,w12
+ ror w0,w0,#2
+ eor v5.16b,v5.16b,v6.16b
+ add w11,w11,w7
+ add w7,w7,w0
+ eor w3,w3,w9
+ sli v7.4s,v4.4s,#14
+ add w6,w6,w2
+ ushr v8.4s,v2.4s,#17
+ eor w2,w4,w5
+ eor w0,w11,w11,ror#5
+ eor v5.16b,v5.16b,v7.16b
+ add w7,w7,w3
+ and w2,w2,w11
+ sli v8.4s,v2.4s,#15
+ eor w3,w0,w11,ror#19
+ eor w0,w7,w7,ror#11
+ ushr v9.4s,v2.4s,#10
+ ror w3,w3,#6
+ eor w2,w2,w5
+ add v3.4s,v3.4s,v5.4s
+ add w6,w6,w3
+ eor w3,w7,w8
+ eor v9.16b,v9.16b,v8.16b
+ eor w0,w0,w7,ror#20
+ add w6,w6,w2
+ ushr v8.4s,v2.4s,#19
+ ldr w2,[sp,#56]
+ and w12,w12,w3
+ sli v8.4s,v2.4s,#13
+ ror w0,w0,#2
+ add w10,w10,w6
+ eor v9.16b,v9.16b,v8.16b
+ add w6,w6,w0
+ eor w12,w12,w8
+ mov d9,v9.d[1]
+ add w5,w5,w2
+ eor w2,w11,w4
+ add v3.4s,v3.4s,v9.4s
+ eor w0,w10,w10,ror#5
+ add w6,w6,w12
+ ushr v10.4s,v3.4s,#17
+ and w2,w2,w10
+ eor w12,w0,w10,ror#19
+ sli v10.4s,v3.4s,#15
+ eor w0,w6,w6,ror#11
+ ror w12,w12,#6
+ ushr v11.4s,v3.4s,#10
+ eor w2,w2,w4
+ add w5,w5,w12
+ eor v11.16b,v11.16b,v10.16b
+ eor w12,w6,w7
+ eor w0,w0,w6,ror#20
+ ushr v10.4s,v3.4s,#19
+ add w5,w5,w2
+ ldr w2,[sp,#60]
+ ld1 {v4.4s},[x14], #16
+ and w3,w3,w12
+ ror w0,w0,#2
+ sli v10.4s,v3.4s,#13
+ add w9,w9,w5
+ add w5,w5,w0
+ eor w3,w3,w7
+ eor v11.16b,v11.16b,v10.16b
+ add w4,w4,w2
+ eor v9.16b,v9.16b,v9.16b
+ eor w2,w10,w11
+ eor w0,w9,w9,ror#5
+ mov v9.d[1],v11.d[0]
+ add w5,w5,w3
+ and w2,w2,w9
+ add v3.4s,v3.4s,v9.4s
+ eor w3,w0,w9,ror#19
+ eor w0,w5,w5,ror#11
+ add v4.4s,v4.4s,v3.4s
+ ror w3,w3,#6
+ eor w2,w2,w11
+ add w4,w4,w3
+ eor w3,w5,w6
+ eor w0,w0,w5,ror#20
+ add w4,w4,w2
+ ldr w2,[x14]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w8,w8,w4
+ st1 {v4.4s},[x1], #16
+ add w4,w4,w0
+ eor w12,w12,w6
+ cmp w2,#0 // check for K256 terminator
+ ldr w2,[sp,#0]
+ sub x1,x1,#64
+ bne .L_00_48
+
+ ldr x1,[sp,#72]
+ ldr x0,[sp,#80]
+ sub x14,x14,#256 // rewind x14
+ cmp x1,x0
+ mov x0, #64
+ csel x0, x0, xzr, eq
+ sub x1,x1,x0 // avoid SEGV
+ ld1 {v0.4s},[x1], #16 // load next input block
+ ld1 {v1.4s},[x1], #16
+ ld1 {v2.4s},[x1], #16
+ ld1 {v3.4s},[x1], #16
+ str x1,[sp,#72]
+ mov x1,sp
+ add w11,w11,w2
+ eor w2,w9,w10
+ eor w0,w8,w8,ror#5
+ add w4,w4,w12
+ ld1 {v4.4s},[x14], #16
+ and w2,w2,w8
+ eor w12,w0,w8,ror#19
+ eor w0,w4,w4,ror#11
+ ror w12,w12,#6
+ rev32 v0.16b,v0.16b
+ eor w2,w2,w10
+ add w11,w11,w12
+ eor w12,w4,w5
+ eor w0,w0,w4,ror#20
+ add v4.4s,v4.4s,v0.4s
+ add w11,w11,w2
+ ldr w2,[sp,#4]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w7,w7,w11
+ add w11,w11,w0
+ eor w3,w3,w5
+ add w10,w10,w2
+ eor w2,w8,w9
+ eor w0,w7,w7,ror#5
+ add w11,w11,w3
+ and w2,w2,w7
+ eor w3,w0,w7,ror#19
+ eor w0,w11,w11,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w9
+ add w10,w10,w3
+ eor w3,w11,w4
+ eor w0,w0,w11,ror#20
+ add w10,w10,w2
+ ldr w2,[sp,#8]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w6,w6,w10
+ add w10,w10,w0
+ eor w12,w12,w4
+ add w9,w9,w2
+ eor w2,w7,w8
+ eor w0,w6,w6,ror#5
+ add w10,w10,w12
+ and w2,w2,w6
+ eor w12,w0,w6,ror#19
+ eor w0,w10,w10,ror#11
+ ror w12,w12,#6
+ eor w2,w2,w8
+ add w9,w9,w12
+ eor w12,w10,w11
+ eor w0,w0,w10,ror#20
+ add w9,w9,w2
+ ldr w2,[sp,#12]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w5,w5,w9
+ add w9,w9,w0
+ eor w3,w3,w11
+ add w8,w8,w2
+ eor w2,w6,w7
+ eor w0,w5,w5,ror#5
+ add w9,w9,w3
+ and w2,w2,w5
+ eor w3,w0,w5,ror#19
+ eor w0,w9,w9,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w7
+ add w8,w8,w3
+ eor w3,w9,w10
+ eor w0,w0,w9,ror#20
+ add w8,w8,w2
+ ldr w2,[sp,#16]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w4,w4,w8
+ add w8,w8,w0
+ eor w12,w12,w10
+ st1 {v4.4s},[x1], #16
+ add w7,w7,w2
+ eor w2,w5,w6
+ eor w0,w4,w4,ror#5
+ add w8,w8,w12
+ ld1 {v4.4s},[x14], #16
+ and w2,w2,w4
+ eor w12,w0,w4,ror#19
+ eor w0,w8,w8,ror#11
+ ror w12,w12,#6
+ rev32 v1.16b,v1.16b
+ eor w2,w2,w6
+ add w7,w7,w12
+ eor w12,w8,w9
+ eor w0,w0,w8,ror#20
+ add v4.4s,v4.4s,v1.4s
+ add w7,w7,w2
+ ldr w2,[sp,#20]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w11,w11,w7
+ add w7,w7,w0
+ eor w3,w3,w9
+ add w6,w6,w2
+ eor w2,w4,w5
+ eor w0,w11,w11,ror#5
+ add w7,w7,w3
+ and w2,w2,w11
+ eor w3,w0,w11,ror#19
+ eor w0,w7,w7,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w5
+ add w6,w6,w3
+ eor w3,w7,w8
+ eor w0,w0,w7,ror#20
+ add w6,w6,w2
+ ldr w2,[sp,#24]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w10,w10,w6
+ add w6,w6,w0
+ eor w12,w12,w8
+ add w5,w5,w2
+ eor w2,w11,w4
+ eor w0,w10,w10,ror#5
+ add w6,w6,w12
+ and w2,w2,w10
+ eor w12,w0,w10,ror#19
+ eor w0,w6,w6,ror#11
+ ror w12,w12,#6
+ eor w2,w2,w4
+ add w5,w5,w12
+ eor w12,w6,w7
+ eor w0,w0,w6,ror#20
+ add w5,w5,w2
+ ldr w2,[sp,#28]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w9,w9,w5
+ add w5,w5,w0
+ eor w3,w3,w7
+ add w4,w4,w2
+ eor w2,w10,w11
+ eor w0,w9,w9,ror#5
+ add w5,w5,w3
+ and w2,w2,w9
+ eor w3,w0,w9,ror#19
+ eor w0,w5,w5,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w11
+ add w4,w4,w3
+ eor w3,w5,w6
+ eor w0,w0,w5,ror#20
+ add w4,w4,w2
+ ldr w2,[sp,#32]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w8,w8,w4
+ add w4,w4,w0
+ eor w12,w12,w6
+ st1 {v4.4s},[x1], #16
+ add w11,w11,w2
+ eor w2,w9,w10
+ eor w0,w8,w8,ror#5
+ add w4,w4,w12
+ ld1 {v4.4s},[x14], #16
+ and w2,w2,w8
+ eor w12,w0,w8,ror#19
+ eor w0,w4,w4,ror#11
+ ror w12,w12,#6
+ rev32 v2.16b,v2.16b
+ eor w2,w2,w10
+ add w11,w11,w12
+ eor w12,w4,w5
+ eor w0,w0,w4,ror#20
+ add v4.4s,v4.4s,v2.4s
+ add w11,w11,w2
+ ldr w2,[sp,#36]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w7,w7,w11
+ add w11,w11,w0
+ eor w3,w3,w5
+ add w10,w10,w2
+ eor w2,w8,w9
+ eor w0,w7,w7,ror#5
+ add w11,w11,w3
+ and w2,w2,w7
+ eor w3,w0,w7,ror#19
+ eor w0,w11,w11,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w9
+ add w10,w10,w3
+ eor w3,w11,w4
+ eor w0,w0,w11,ror#20
+ add w10,w10,w2
+ ldr w2,[sp,#40]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w6,w6,w10
+ add w10,w10,w0
+ eor w12,w12,w4
+ add w9,w9,w2
+ eor w2,w7,w8
+ eor w0,w6,w6,ror#5
+ add w10,w10,w12
+ and w2,w2,w6
+ eor w12,w0,w6,ror#19
+ eor w0,w10,w10,ror#11
+ ror w12,w12,#6
+ eor w2,w2,w8
+ add w9,w9,w12
+ eor w12,w10,w11
+ eor w0,w0,w10,ror#20
+ add w9,w9,w2
+ ldr w2,[sp,#44]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w5,w5,w9
+ add w9,w9,w0
+ eor w3,w3,w11
+ add w8,w8,w2
+ eor w2,w6,w7
+ eor w0,w5,w5,ror#5
+ add w9,w9,w3
+ and w2,w2,w5
+ eor w3,w0,w5,ror#19
+ eor w0,w9,w9,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w7
+ add w8,w8,w3
+ eor w3,w9,w10
+ eor w0,w0,w9,ror#20
+ add w8,w8,w2
+ ldr w2,[sp,#48]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w4,w4,w8
+ add w8,w8,w0
+ eor w12,w12,w10
+ st1 {v4.4s},[x1], #16
+ add w7,w7,w2
+ eor w2,w5,w6
+ eor w0,w4,w4,ror#5
+ add w8,w8,w12
+ ld1 {v4.4s},[x14], #16
+ and w2,w2,w4
+ eor w12,w0,w4,ror#19
+ eor w0,w8,w8,ror#11
+ ror w12,w12,#6
+ rev32 v3.16b,v3.16b
+ eor w2,w2,w6
+ add w7,w7,w12
+ eor w12,w8,w9
+ eor w0,w0,w8,ror#20
+ add v4.4s,v4.4s,v3.4s
+ add w7,w7,w2
+ ldr w2,[sp,#52]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w11,w11,w7
+ add w7,w7,w0
+ eor w3,w3,w9
+ add w6,w6,w2
+ eor w2,w4,w5
+ eor w0,w11,w11,ror#5
+ add w7,w7,w3
+ and w2,w2,w11
+ eor w3,w0,w11,ror#19
+ eor w0,w7,w7,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w5
+ add w6,w6,w3
+ eor w3,w7,w8
+ eor w0,w0,w7,ror#20
+ add w6,w6,w2
+ ldr w2,[sp,#56]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w10,w10,w6
+ add w6,w6,w0
+ eor w12,w12,w8
+ add w5,w5,w2
+ eor w2,w11,w4
+ eor w0,w10,w10,ror#5
+ add w6,w6,w12
+ and w2,w2,w10
+ eor w12,w0,w10,ror#19
+ eor w0,w6,w6,ror#11
+ ror w12,w12,#6
+ eor w2,w2,w4
+ add w5,w5,w12
+ eor w12,w6,w7
+ eor w0,w0,w6,ror#20
+ add w5,w5,w2
+ ldr w2,[sp,#60]
+ and w3,w3,w12
+ ror w0,w0,#2
+ add w9,w9,w5
+ add w5,w5,w0
+ eor w3,w3,w7
+ add w4,w4,w2
+ eor w2,w10,w11
+ eor w0,w9,w9,ror#5
+ add w5,w5,w3
+ and w2,w2,w9
+ eor w3,w0,w9,ror#19
+ eor w0,w5,w5,ror#11
+ ror w3,w3,#6
+ eor w2,w2,w11
+ add w4,w4,w3
+ eor w3,w5,w6
+ eor w0,w0,w5,ror#20
+ add w4,w4,w2
+ ldr x2,[sp,#64]
+ and w12,w12,w3
+ ror w0,w0,#2
+ add w8,w8,w4
+ add w4,w4,w0
+ eor w12,w12,w6
+ st1 {v4.4s},[x1], #16
+ ldr w0,[x2,#0]
+ add w4,w4,w12 // h+=Maj(a,b,c) from the past
+ ldr w12,[x2,#4]
+ ldr w3,[x2,#8]
+ ldr w1,[x2,#12]
+ add w4,w4,w0 // accumulate
+ ldr w0,[x2,#16]
+ add w5,w5,w12
+ ldr w12,[x2,#20]
+ add w6,w6,w3
+ ldr w3,[x2,#24]
+ add w7,w7,w1
+ ldr w1,[x2,#28]
+ add w8,w8,w0
+ str w4,[x2],#4
+ add w9,w9,w12
+ str w5,[x2],#4
+ add w10,w10,w3
+ str w6,[x2],#4
+ add w11,w11,w1
+ str w7,[x2],#4
+
+ stp w8, w9, [x2]
+ stp w10, w11, [x2, #8]
+
+ b.eq 0f
+ mov x1,sp
+ ldr w2,[sp,#0]
+ eor w12,w12,w12
+ eor w3,w5,w6
+ b .L_00_48
+
+0: add sp,sp,#16*4+32
+ ldp x29, x30, [sp], #16
+ ret
+
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
diff --git a/arch/arm64/crypto/sha256_neon_glue.c b/arch/arm64/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..149a4bb869ea
--- /dev/null
+++ b/arch/arm64/crypto/sha256_neon_glue.c
@@ -0,0 +1,103 @@
+/*
+ * AArch64 port of the OpenSSL SHA256 implementation for ARM NEON
+ *
+ * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <asm/neon.h>
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 NEON");
+MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+ unsigned int num_blks);
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+ return crypto_sha256_update(desc, data, len);
+
+ kernel_neon_begin_partial(12);
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ kernel_neon_begin_partial(12);
+ if (len)
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return sha256_base_finish(desc, out);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ return sha256_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg algs[] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_base_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base.cra_name = "sha256",
+ .base.cra_driver_name = "sha256-neon",
+ .base.cra_priority = 150,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = SHA256_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_base_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base.cra_name = "sha224",
+ .base.cra_driver_name = "sha224-neon",
+ .base.cra_priority = 150,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = SHA224_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+} };
+
+static int __init sha256_neon_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha256_neon_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha256_neon_mod_init);
+module_exit(sha256_neon_mod_fini);
--
2.7.4
^ permalink raw reply related
* Re: [PATCH] crypto: arm64/sha256 - add support for SHA256 using NEON instructions
From: Ard Biesheuvel @ 2016-09-29 23:37 UTC (permalink / raw)
To: linux-arm-kernel@lists.infradead.org,
linux-crypto@vger.kernel.org, Herbert Xu
Cc: Andy Polyakov, Victor Chong, Daniel Thompson, Will Deacon,
Catalin Marinas, Ard Biesheuvel
In-Reply-To: <1475189503-9175-2-git-send-email-ard.biesheuvel@linaro.org>
On 29 September 2016 at 15:51, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> This is a port to arm64 of the NEON implementation of SHA256 that lives
> under arch/arm/crypto.
>
> Due to the fact that the AArch64 assembler dialect deviates from the
> 32-bit ARM one in ways that makes sharing code problematic, and given
> that this version only uses the NEON version whereas the original
> implementation supports plain ALU assembler, NEON and Crypto Extensions,
> this code is built from a version sha256-armv4.pl that has been
> transliterated to the AArch64 NEON dialect.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> arch/arm64/crypto/Kconfig | 5 +
> arch/arm64/crypto/Makefile | 11 +
> arch/arm64/crypto/sha256-armv4.pl | 413 +++++++++
> arch/arm64/crypto/sha256-core.S_shipped | 883 ++++++++++++++++++++
> arch/arm64/crypto/sha256_neon_glue.c | 103 +++
> 5 files changed, 1415 insertions(+)
>
> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
> index 2cf32e9887e1..d32371198474 100644
> --- a/arch/arm64/crypto/Kconfig
> +++ b/arch/arm64/crypto/Kconfig
> @@ -18,6 +18,11 @@ config CRYPTO_SHA2_ARM64_CE
> depends on ARM64 && KERNEL_MODE_NEON
> select CRYPTO_HASH
>
> +config CRYPTO_SHA2_ARM64_NEON
> + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 NEON)"
> + depends on ARM64 && KERNEL_MODE_NEON
> + select CRYPTO_HASH
> +
> config CRYPTO_GHASH_ARM64_CE
> tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
> depends on ARM64 && KERNEL_MODE_NEON
> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
> index abb79b3cfcfe..5156ebee0488 100644
> --- a/arch/arm64/crypto/Makefile
> +++ b/arch/arm64/crypto/Makefile
> @@ -29,6 +29,9 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
> obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
> aes-neon-blk-y := aes-glue-neon.o aes-neon.o
>
> +obj-$(CONFIG_CRYPTO_SHA2_ARM64_NEON) := sha256-neon.o
There is a typo here that I only spotted just now: this should be += not :=
Herbert, if you're picking this up, could you please fix this at merge
time? Or do you need me to resend?
Thanks,
Ard.
^ permalink raw reply
* Char.c
From: Fontaine david @ 2016-09-30 8:09 UTC (permalink / raw)
To: linux-crypto
Hi Linus:
This push fixes a weakness in random number generation of file random.c.
The two polynomials of LFSR used in Linux/drivers/char/random.c are
P1(X) = x^128 + x^104 + x^76 + x^51 +x^25 + x + 1, for input pool
P2(X) = x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 , for output pool
These polynomials Q1(X) = alpha ^3 *(P1(X)-1)+1 and Q2(X) = alpha ^3
*(P2(X)-1)+1 are not primitive over GF(2^32) where alpha is an
primitive element of GF(2^32).
It turns out that periods of LFSR corresponding to these polynomials
are not optimal, it means that the space of numbers generated by these
LFSR is not GF(2^(32*deg(Pi))-1), i=1,2.
As mentioned in the random.c file, these polynomials come from an
article http://eprint.iacr.org/2012/251.pdf. It is stated in the
article that these polynomials have periods (2^(32*deg(Pi))-1)/3,
i=1,2, so not optimal.
We can improve these LFSR choosing these polynomials as primitive and
therefore increase the space of numbers generated by 3.
The polynomials used in the current implementation of the PRNG and the
point presented here, do not conclude a practical attack on the PRNG.
After several calculations, we propose here the following polynomials:
R1(x) = x^128 + x^106 +x^79 + x^51 +x^25 + x+ 1, as new polynomial of input pool
R2(x) = x^32 + x^27 + x^21 + x^14 + x^7 + x +1, as new polynomial of
the output pool
So polynomials S1(X) = alpha^4*(R1(X)-1)+1 and S2(X) =
alpha^4*(R2(X)-1)+1 are primitive on GF(2^32).
It is very easy to check their primitive with magma. Use the online
tool (http://magma.maths.usyd.edu.au/calc/) and the following code:
K0:=GF(2);
P<X> := PolynomialRing(K0);
K1<a> := ext<K0|X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+1>;K1;
P<t> := PolynomialRing(K1);
R1 := t^128 + t^106 + t^79 + t^51 + t^25 + t + 1;
S1 := a^4*(R1-1)+1;
R2 := t^32 + t^27 + t^21 + t^14 + t^7 + t + 1;
S2 := a^4*(R2-1)+1;
S1test := Evaluate((1/(t^128))*S1,1/t);S1test;
> t^128 + a*t^127 + a*t^100 + a*t^74 + a*t^53 + a*t^25 + a
S1test := t^128 + a^4*t^127 + a^4*t^103 + a^4*t^77 + a^4*t^49 + a^4*t^22 + a^4;
IsPrimitive(S1test);
> true
S2test := Evaluate((1/(t^32))*S2,1/t);S2test;
> t^32 + a*t^31 + a*t^24 + a*t^16 + a*t^12 + a*t^6 + a
S2test := t^32 + a^4*t^31 + a^4*t^25 + a^4*t^18 + a^4*t^11 + a^4*t^5 + a^4;
IsPrimitive(S2test);
> true
To use these polynomials, the following changes in the random.c file
should be applied:
olivier@Zebulon:~/Documents/linux-4.7.4/drivers/char$ diff random.c random-new.c
371,372c371,373
< /* x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
< { S(128), 104, 76, 51, 25, 1 },
---
> /* was: x^128 + x^104 + x^76 + x^51 +x^25 + x + 1 */
> /* x^128 + x^106 + x^79 + x^51 +x^25 + x + 1 */
> { S(128), 106, 79, 51, 25, 1 },
374,375c375,377
< /* x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */
< { S(32), 26, 19, 14, 7, 1 },
---
> /* was: x^32 + x^26 + x^19 + x^14 + x^7 + x + 1 */
> /* x^32 + x^27 + x^21 + x^14 + x^7 + x + 1 */
> { S(32), 27, 21, 14, 7, 1 },
478a481
> /* was:
481a485,490
> */
> static __u32 const twist_table[16] = {
> 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac,
> 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
> 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c,
> 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
525c534,536
< r->pool[i] = (w >> 3) ^ twist_table[w & 7];
---
> /*
> was: r->pool[i] = (w >> 3) ^ twist_table[w & 7];*/
> r->pool[i] = (w >> 4) ^ twist_table[w & 15];
Regards,
David Fontaine & Olivier Vivolo
^ permalink raw reply
* [PATCH v2] crypto: caam - treat SGT address pointer as u64
From: Tudor Ambarus @ 2016-09-30 9:09 UTC (permalink / raw)
To: fabio.estevam, horia.geanta, herbert; +Cc: linux-crypto, Tudor Ambarus
Even for i.MX, CAAM is able to use address pointers greater than
32 bits, the address pointer field being interpreted as a double word.
Enforce u64 address pointer in the sec4_sg_entry struct.
This patch fixes the SGT address pointer endianness issue for
32bit platforms where core endianness != caam endianness.
Signed-off-by: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
---
v2: Removed mx7d restriction.
drivers/crypto/caam/desc.h | 6 ------
drivers/crypto/caam/regs.h | 8 ++++++++
drivers/crypto/caam/sg_sw_sec4.h | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 26427c1..513b664 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -23,13 +23,7 @@
#define SEC4_SG_OFFSET_MASK 0x00001fff
struct sec4_sg_entry {
-#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \
- defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX)
- u32 rsvd1;
- dma_addr_t ptr;
-#else
u64 ptr;
-#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */
u32 len;
u32 bpid_offset;
};
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index b3c5016..84d2f83 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -196,6 +196,14 @@ static inline u64 rd_reg64(void __iomem *reg)
#define caam_dma_to_cpu(value) caam32_to_cpu(value)
#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
+#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX
+#define cpu_to_caam_dma64(value) \
+ (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \
+ (u64)cpu_to_caam32(upper_32_bits(value)))
+#else
+#define cpu_to_caam_dma64(value) cpu_to_caam64(value)
+#endif
+
/*
* jr_outentry
* Represents each entry in a JobR output ring
diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h
index 19dc64f..41cd5a3 100644
--- a/drivers/crypto/caam/sg_sw_sec4.h
+++ b/drivers/crypto/caam/sg_sw_sec4.h
@@ -15,7 +15,7 @@ struct sec4_sg_entry;
static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr,
dma_addr_t dma, u32 len, u16 offset)
{
- sec4_sg_ptr->ptr = cpu_to_caam_dma(dma);
+ sec4_sg_ptr->ptr = cpu_to_caam_dma64(dma);
sec4_sg_ptr->len = cpu_to_caam32(len);
sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK);
#ifdef DEBUG
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH] arm64: add support for SHA256 using NEON instructions
From: Andy Polyakov @ 2016-09-30 10:44 UTC (permalink / raw)
To: Ard Biesheuvel, linux-arm-kernel, linux-crypto, herbert
Cc: catalin.marinas, victor.chong, will.deacon, daniel.thompson
In-Reply-To: <1475189503-9175-1-git-send-email-ard.biesheuvel@linaro.org>
> This is a port of the ARMv7 implementation in arch/arm/crypto. For a Cortex-A57
> (r2p1), the performance numbers are listed below. In summary, 40% - 50% speedup
> where it counts, i.e., block sizes over 256 bytes with few updates.
Cool! Great! Just in case for reference. You compare generic, new NEON
and hardware-assisted implementations. I assume that first one refers to
C compiler-generated code. But there is another option, i.e. non-NEON
assembly. Now to the "for reference" part. The reason for why NEON is
not utilized in OpenSSL is because it's deemed that it doesn't provide
"extraordinary" improvement over non-NEON assembly code, especially on
less sophisticated processors such as Cortex-A53. Note that I'm not
saying that NEON SHA256 subroutine is not faster, it is, only that it's
not "extraordinarily" faster in most relevant cases(*). In other words
it's reckoned that non-NEON assembly provides adequate *all-round*
performance, taking into consideration that it does it without being
dependent on optional NEON. Non-NEON assembly should also be interesting
in kernel context, because there are situations when you can't call NEON
procedure, be it suggested one or hardware-assisted, which itself relies
on NEON. And of course another nice quality about SHA2 module in OpenSSL
is that it emits both SHA256 and SHA512 codes ;-) On related note it
should be noted that NEON-izing SHA512 on ARM64 makes lesser sense, it's
bound to provide lesser improvement than SHA256 [if any at all in some
cases]. This is because in SHA256 you engage 4 lanes of NEON registers,
while in SHA512 case you have only 2.
(*) Well, this is also question of priorities. My rationale is that
there is a lot of Cortex-A53 and A57 phones out there that don't have
crypto-extensions, I refer to Qualcomm SoCs, where NEON gives less than
10% improvement [over non-NEON assembly]. Yes, it gives more on X-Gene,
but X-Gene is not wide-spread, and the rest (including upcoming X-Gene)
have crypto-extensions, so alternative code path doesn't matter.
^ permalink raw reply
* [PATCH] padata: add helper function for queue length
From: Jason A. Donenfeld @ 2016-10-02 1:46 UTC (permalink / raw)
To: Steffen Klassert, linux-crypto, linux-kernel; +Cc: Jason A. Donenfeld
Since padata has a maximum number of inflight jobs, currently 1000, it's
very useful to know how many jobs are currently queued up. This adds a
simple helper function to expose this information.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
include/linux/padata.h | 2 ++
kernel/padata.c | 16 ++++++++++++++++
2 files changed, 18 insertions(+)
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 113ee62..4840ae4 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -3,6 +3,7 @@
*
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -181,4 +182,5 @@ extern int padata_register_cpumask_notifier(struct padata_instance *pinst,
struct notifier_block *nblock);
extern int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
struct notifier_block *nblock);
+extern int padata_queue_len(struct padata_instance *pinst);
#endif
diff --git a/kernel/padata.c b/kernel/padata.c
index 9932788..17c1e08 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -5,6 +5,7 @@
*
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
+ * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1039,3 +1040,18 @@ void padata_free(struct padata_instance *pinst)
kobject_put(&pinst->kobj);
}
EXPORT_SYMBOL(padata_free);
+
+/**
+ * padata_queue_len - retreive the number of in progress jobs
+ *
+ * @padata_inst: padata instance from which to read the queue size
+ */
+int padata_queue_len(struct padata_instance *pinst)
+{
+ int len;
+ rcu_read_lock_bh();
+ len = atomic_read(&rcu_dereference_bh(pinst->pd)->refcnt);
+ rcu_read_unlock_bh();
+ return len;
+}
+EXPORT_SYMBOL(padata_queue_len);
--
2.10.0
^ permalink raw reply related
* Re: [PATCH] crypto: arm64/sha256 - add support for SHA256 using NEON instructions
From: Ard Biesheuvel @ 2016-10-02 2:58 UTC (permalink / raw)
To: linux-arm-kernel@lists.infradead.org,
linux-crypto@vger.kernel.org, Herbert Xu
Cc: Daniel Thompson, Ard Biesheuvel, Catalin Marinas, Will Deacon,
Andy Polyakov, Victor Chong
In-Reply-To: <CAKv+Gu8J9rHA2opKqHQr74GU6xxwMpaJak1mNpRVpOf=+BZ81w@mail.gmail.com>
On 29 September 2016 at 16:37, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 29 September 2016 at 15:51, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
>> This is a port to arm64 of the NEON implementation of SHA256 that lives
>> under arch/arm/crypto.
>>
>> Due to the fact that the AArch64 assembler dialect deviates from the
>> 32-bit ARM one in ways that makes sharing code problematic, and given
>> that this version only uses the NEON version whereas the original
>> implementation supports plain ALU assembler, NEON and Crypto Extensions,
>> this code is built from a version sha256-armv4.pl that has been
>> transliterated to the AArch64 NEON dialect.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>> arch/arm64/crypto/Kconfig | 5 +
>> arch/arm64/crypto/Makefile | 11 +
>> arch/arm64/crypto/sha256-armv4.pl | 413 +++++++++
>> arch/arm64/crypto/sha256-core.S_shipped | 883 ++++++++++++++++++++
>> arch/arm64/crypto/sha256_neon_glue.c | 103 +++
>> 5 files changed, 1415 insertions(+)
>>
>> diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
>> index 2cf32e9887e1..d32371198474 100644
>> --- a/arch/arm64/crypto/Kconfig
>> +++ b/arch/arm64/crypto/Kconfig
>> @@ -18,6 +18,11 @@ config CRYPTO_SHA2_ARM64_CE
>> depends on ARM64 && KERNEL_MODE_NEON
>> select CRYPTO_HASH
>>
>> +config CRYPTO_SHA2_ARM64_NEON
>> + tristate "SHA-224/SHA-256 digest algorithm (ARMv8 NEON)"
>> + depends on ARM64 && KERNEL_MODE_NEON
>> + select CRYPTO_HASH
>> +
>> config CRYPTO_GHASH_ARM64_CE
>> tristate "GHASH (for GCM chaining mode) using ARMv8 Crypto Extensions"
>> depends on ARM64 && KERNEL_MODE_NEON
>> diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
>> index abb79b3cfcfe..5156ebee0488 100644
>> --- a/arch/arm64/crypto/Makefile
>> +++ b/arch/arm64/crypto/Makefile
>> @@ -29,6 +29,9 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o
>> obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o
>> aes-neon-blk-y := aes-glue-neon.o aes-neon.o
>>
>> +obj-$(CONFIG_CRYPTO_SHA2_ARM64_NEON) := sha256-neon.o
>
> There is a typo here that I only spotted just now: this should be += not :=
>
> Herbert, if you're picking this up, could you please fix this at merge
> time? Or do you need me to resend?
>
Please disregard this patch for now. I will follow up with a more
elaborate series for SHA256 on arm64
^ permalink raw reply
* 20071 linux-crypto
From: ccmembership @ 2016-10-02 8:16 UTC (permalink / raw)
To: linux-crypto
[-- Attachment #1: EMAIL_8347552430_linux-crypto.zip --]
[-- Type: application/zip, Size: 4558 bytes --]
^ permalink raw reply
* Re: [PATCH] crypto: testmgr - add guard to dst buffer for ahash_export
From: Herbert Xu @ 2016-10-02 14:37 UTC (permalink / raw)
To: Jan Stancek; +Cc: linux-crypto, linux-kernel, marcelo.cerri
In-Reply-To: <26da11d26bf0d34f9a5896ab9f9540db5be17012.1475072761.git.jstancek@redhat.com>
On Wed, Sep 28, 2016 at 04:38:37PM +0200, Jan Stancek wrote:
> Add a guard to 'state' buffer and warn if its consistency after
> call to crypto_ahash_export() changes, so that any write that
> goes beyond advertised statesize (and thus causing potential
> memory corruption [1]) is more visible.
>
> [1] https://marc.info/?l=linux-crypto-vger&m=147467656516085
>
> Signed-off-by: Jan Stancek <jstancek@redhat.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Marcelo Cerri <marcelo.cerri@canonical.com>
Patch applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH] crypto: sha1-powerpc: little-endian support
From: Herbert Xu @ 2016-10-02 14:37 UTC (permalink / raw)
To: Marcelo Cerri
Cc: David S. Miller, linux-crypto, linuxppc-dev, linux-kernel,
Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
George Wilson, Claudio Carvalho, Paulo Flabiano Smorigo,
joy.latten
In-Reply-To: <1474659116-4689-1-git-send-email-marcelo.cerri@canonical.com>
On Fri, Sep 23, 2016 at 04:31:56PM -0300, Marcelo Cerri wrote:
> The driver does not handle endianness properly when loading the input
> data.
>
> Signed-off-by: Marcelo Cerri <marcelo.cerri@canonical.com>
Patch applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH] crypto: sunxi-ss: mark sun4i_hash() static
From: Herbert Xu @ 2016-10-02 14:37 UTC (permalink / raw)
To: Baoyou Xie
Cc: clabbe.montjoie, davem, maxime.ripard, wens, linux-crypto,
linux-arm-kernel, linux-kernel, arnd, xie.baoyou
In-Reply-To: <1474691326-15541-1-git-send-email-baoyou.xie@linaro.org>
On Sat, Sep 24, 2016 at 12:28:46PM +0800, Baoyou Xie wrote:
> We get 1 warning when building kernel with W=1:
> drivers/crypto/sunxi-ss/sun4i-ss-hash.c:168:5: warning: no previous prototype for 'sun4i_hash' [-Wmissing-prototypes]
>
> In fact, this function is only used in the file in which it is
> declared and don't need a declaration, but can be made static.
> So this patch marks it 'static'.
>
> Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org>
This patch has already been applied.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH v2] crypto: gcm - Fix IV buffer size in crypto_gcm_setkey
From: Herbert Xu @ 2016-10-02 14:38 UTC (permalink / raw)
To: Ondrej Mosnacek; +Cc: linux-crypto
In-Reply-To: <1474620452-7278-1-git-send-email-omosnacek@gmail.com>
On Fri, Sep 23, 2016 at 10:47:32AM +0200, Ondrej Mosnacek wrote:
> The cipher block size for GCM is 16 bytes, and thus the CTR transform
> used in crypto_gcm_setkey() will also expect a 16-byte IV. However,
> the code currently reserves only 8 bytes for the IV, causing
> an out-of-bounds access in the CTR transform. This patch fixes
> the issue by setting the size of the IV buffer to 16 bytes.
>
> Fixes: 84c911523020 ("[CRYPTO] gcm: Add support for async ciphers")
> Signed-off-by: Ondrej Mosnacek <omosnacek@gmail.com>
Patch applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [BUG] crypto: atmel-aes - erro when compiling with VERBOSE_DEBUG enable
From: Herbert Xu @ 2016-10-02 14:38 UTC (permalink / raw)
To: Cyrille Pitchen; +Cc: levent demir, linux-crypto
In-Reply-To: <de4c3d75-eb4f-f29b-5bde-e2b5ee79c8d9@atmel.com>
On Tue, Sep 27, 2016 at 06:45:18PM +0200, Cyrille Pitchen wrote:
> Hi Levent,
>
> there is a typo in the subject line: erroR.
> Also it would be better to start the summary phrase of the subject line with a
> verb:
>
> crypto: atmel-aes: fix compiler error when VERBODE_DEBUG is defined
>
> Le 22/09/2016 à 14:45, levent demir a écrit :
> > Fix debug function call in atmel_aes_write
> >
> > Signed-off-by: Levent DEMIR <levent.demir@inria.fr>
> > ---
> > drivers/crypto/atmel-aes.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
> > index e3d40a8..2b0f926 100644
> > --- a/drivers/crypto/atmel-aes.c
> > +++ b/drivers/crypto/atmel-aes.c
> > @@ -317,7 +317,7 @@ static inline void atmel_aes_write(struct
> > atmel_aes_dev *dd,
> > char tmp[16];
> >
> > dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
> > - atmel_aes_reg_name(offset, tmp));
> > + atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
> It looks like a space has been removed.
It's been completely mangled by the mailer and cannot be applied.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH 0/3] Fix crypto/vmx/p8_ghash memory corruption
From: Herbert Xu @ 2016-10-02 14:40 UTC (permalink / raw)
To: Marcelo Cerri
Cc: linux-crypto, David S. Miller, Paulo Flabiano Smorigo,
Leonidas S. Barbosa, linuxppc-dev, linux-kernel,
Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
George Wilson
In-Reply-To: <1475080931-7926-1-git-send-email-marcelo.cerri@canonical.com>
On Wed, Sep 28, 2016 at 01:42:08PM -0300, Marcelo Cerri wrote:
> This series fixes the memory corruption found by Jan Stancek in 4.8-rc7. The
> problem however also affects previous versions of the driver.
>
> Marcelo Cerri (3):
> crypto: ghash-generic - move common definitions to a new header file
> crypto: vmx - Fix memory corruption caused by p8_ghash
> crypto: vmx - Ensure ghash-generic is enabled
Patch 1/2 applied to crypto. I have changed patch 3 to use select
instead of depend and have applied it to cryptodev.
Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH 0/3] Fix crypto/vmx/p8_ghash memory corruption
From: Herbert Xu @ 2016-10-02 14:40 UTC (permalink / raw)
To: Anton Blanchard
Cc: Marcelo Cerri, linux-crypto, Leonidas S. Barbosa, linux-kernel,
Paul Mackerras, Paulo Flabiano Smorigo, George Wilson,
linuxppc-dev, David S. Miller
In-Reply-To: <20160929065908.653ec5c2@kryten>
On Thu, Sep 29, 2016 at 06:59:08AM +1000, Anton Blanchard wrote:
> Hi Marcelo
>
> > This series fixes the memory corruption found by Jan Stancek in
> > 4.8-rc7. The problem however also affects previous versions of the
> > driver.
>
> If it affects previous versions, please add the lines in the sign off to
> get it into the stable kernels.
I have added them to patches 1 and 2. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH v2 0/2] Minor CCP driver changes
From: Herbert Xu @ 2016-10-02 14:41 UTC (permalink / raw)
To: Gary R Hook; +Cc: linux-crypto, thomas.lendacky, davem
In-Reply-To: <20160928165204.23263.77515.stgit@taos>
On Wed, Sep 28, 2016 at 11:53:31AM -0500, Gary R Hook wrote:
> V2: point a goto statement at the correct label
>
> The following series is for miscellaneous small changes.
Both patches applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH] crypto: arm64/sha256 - add support for SHA256 using NEON instructions
From: Herbert Xu @ 2016-10-02 14:46 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: linux-arm-kernel@lists.infradead.org,
linux-crypto@vger.kernel.org, Andy Polyakov, Victor Chong,
Daniel Thompson, Will Deacon, Catalin Marinas
In-Reply-To: <CAKv+Gu_M+FcuDSaGsJYjD96=Q1iXyFDD3KWy=4vUbeLWRgrHJw@mail.gmail.com>
On Sat, Oct 01, 2016 at 07:58:56PM -0700, Ard Biesheuvel wrote:
>
> Please disregard this patch for now. I will follow up with a more
> elaborate series for SHA256 on arm64
Thanks for the heads up.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH v2] crypto: caam - treat SGT address pointer as u64
From: Herbert Xu @ 2016-10-02 14:47 UTC (permalink / raw)
To: Tudor Ambarus; +Cc: fabio.estevam, horia.geanta, linux-crypto
In-Reply-To: <1475226579-2078-1-git-send-email-tudor-dan.ambarus@nxp.com>
On Fri, Sep 30, 2016 at 12:09:39PM +0300, Tudor Ambarus wrote:
> Even for i.MX, CAAM is able to use address pointers greater than
> 32 bits, the address pointer field being interpreted as a double word.
> Enforce u64 address pointer in the sec4_sg_entry struct.
>
> This patch fixes the SGT address pointer endianness issue for
> 32bit platforms where core endianness != caam endianness.
>
> Signed-off-by: Tudor Ambarus <tudor-dan.ambarus@nxp.com>
Patch applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [BUG] crypto: atmel-aes - erro when compiling with VERBOSE_DEBUG enable
From: Cyrille Pitchen @ 2016-10-03 10:20 UTC (permalink / raw)
To: Herbert Xu; +Cc: levent demir, linux-crypto
In-Reply-To: <20161002143858.GE18268@gondor.apana.org.au>
Hi all,
Le 02/10/2016 à 16:38, Herbert Xu a écrit :
> On Tue, Sep 27, 2016 at 06:45:18PM +0200, Cyrille Pitchen wrote:
>> Hi Levent,
>>
>> there is a typo in the subject line: erroR.
>> Also it would be better to start the summary phrase of the subject line with a
>> verb:
>>
>> crypto: atmel-aes: fix compiler error when VERBODE_DEBUG is defined
>>
>> Le 22/09/2016 à 14:45, levent demir a écrit :
>>> Fix debug function call in atmel_aes_write
>>>
>>> Signed-off-by: Levent DEMIR <levent.demir@inria.fr>
>>> ---
>>> drivers/crypto/atmel-aes.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
>>> index e3d40a8..2b0f926 100644
>>> --- a/drivers/crypto/atmel-aes.c
>>> +++ b/drivers/crypto/atmel-aes.c
>>> @@ -317,7 +317,7 @@ static inline void atmel_aes_write(struct
>>> atmel_aes_dev *dd,
>>> char tmp[16];
>>>
>>> dev_vdbg(dd->dev, "write 0x%08x into %s\n", value,
>>> - atmel_aes_reg_name(offset, tmp));
>>> + atmel_aes_reg_name(offset, tmp, sizeof(tmp)));
>> It looks like a space has been removed.
>
> It's been completely mangled by the mailer and cannot be applied.
>
I've sent a new version in this thread:
https://lkml.org/lkml/2016/9/29/463
I added a Reported-by tag for Levent but if you want to use a Signed-off-by
tag instead, it's fine with me!
Best regards,
Cyrille
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox