* [PATCH V2 8/9] crypto: ccp - Enable support for AES GCM on v5 CCPs
From: Gary R Hook @ 2016-11-04 16:04 UTC (permalink / raw)
To: linux-crypto; +Cc: thomas.lendacky, herbert, davem
In-Reply-To: <20161104160140.18155.75618.stgit@taos>
A version 5 device provides the primitive commands
required for AES GCM. This patch adds support for
en/decryption.
Signed-off-by: Gary R Hook <gary.hook@amd.com>
---
drivers/crypto/ccp/Makefile | 1
drivers/crypto/ccp/ccp-crypto-aes-galois.c | 257 ++++++++++++++++++++++++++++
drivers/crypto/ccp/ccp-crypto-main.c | 12 +
drivers/crypto/ccp/ccp-crypto.h | 14 ++
drivers/crypto/ccp/ccp-dev-v5.c | 2
drivers/crypto/ccp/ccp-dev.h | 1
drivers/crypto/ccp/ccp-ops.c | 252 +++++++++++++++++++++++++++
include/linux/ccp.h | 9 +
8 files changed, 548 insertions(+)
create mode 100644 drivers/crypto/ccp/ccp-crypto-aes-galois.c
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 23f89b7..fd77225 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -13,4 +13,5 @@ ccp-crypto-objs := ccp-crypto-main.o \
ccp-crypto-aes-cmac.o \
ccp-crypto-aes-xts.o \
ccp-crypto-rsa.o \
+ ccp-crypto-aes-galois.o \
ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
new file mode 100644
index 0000000..8bc18c9
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -0,0 +1,257 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/scatterwalk.h>
+#include <linux/delay.h>
+
+#include "ccp-crypto.h"
+
+#define AES_GCM_IVSIZE 12
+
+static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret)
+{
+ return ret;
+}
+
+static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->u.aes.type = CCP_AES_TYPE_128;
+ break;
+ case AES_KEYSIZE_192:
+ ctx->u.aes.type = CCP_AES_TYPE_192;
+ break;
+ case AES_KEYSIZE_256:
+ ctx->u.aes.type = CCP_AES_TYPE_256;
+ break;
+ default:
+ crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ ctx->u.aes.mode = CCP_AES_MODE_GCM;
+ ctx->u.aes.key_len = key_len;
+
+ memcpy(ctx->u.aes.key, key, key_len);
+ sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+ return 0;
+}
+
+static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm,
+ unsigned int authsize)
+{
+ return 0;
+}
+
+static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+ struct ccp_aes_req_ctx *rctx = aead_request_ctx(req);
+ struct scatterlist *iv_sg = NULL;
+ unsigned int iv_len = 0;
+ int i;
+ int ret = 0;
+
+ if (!ctx->u.aes.key_len)
+ return -EINVAL;
+
+ if (ctx->u.aes.mode != CCP_AES_MODE_GCM)
+ return -EINVAL;
+
+ if (!req->iv)
+ return -EINVAL;
+
+ /*
+ * 5 parts:
+ * plaintext/ciphertext input
+ * AAD
+ * key
+ * IV
+ * Destination+tag buffer
+ */
+
+ /* According to the way AES GCM has been implemented here,
+ * per RFC 4106 it seems, the provided IV is fixed at 12 bytes,
+ * occupies the beginning of the IV array. Write a 32-bit
+ * integer after that (bytes 13-16) with a value of "1".
+ */
+ memcpy(rctx->iv, req->iv, AES_GCM_IVSIZE);
+ for (i = 0; i < 3; i++)
+ rctx->iv[i + AES_GCM_IVSIZE] = 0;
+ rctx->iv[AES_BLOCK_SIZE - 1] = 1;
+
+ /* Set up a scatterlist for the IV */
+ iv_sg = &rctx->iv_sg;
+ iv_len = AES_BLOCK_SIZE;
+ sg_init_one(iv_sg, rctx->iv, iv_len);
+
+ /* The AAD + plaintext are concatenated in the src buffer */
+ memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+ INIT_LIST_HEAD(&rctx->cmd.entry);
+ rctx->cmd.engine = CCP_ENGINE_AES;
+ rctx->cmd.u.aes.type = ctx->u.aes.type;
+ rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+ rctx->cmd.u.aes.action =
+ (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT;
+ rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+ rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+ rctx->cmd.u.aes.iv = iv_sg;
+ rctx->cmd.u.aes.iv_len = iv_len;
+ rctx->cmd.u.aes.src = req->src;
+ rctx->cmd.u.aes.src_len = req->cryptlen;
+ rctx->cmd.u.aes.aad_len = req->assoclen;
+
+ /* The cipher text + the tag are in the dst buffer */
+ rctx->cmd.u.aes.dst = req->dst;
+
+ ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+ return ret;
+}
+
+static int ccp_aes_gcm_encrypt(struct aead_request *req)
+{
+ return ccp_aes_gcm_crypt(req, true);
+}
+
+static int ccp_aes_gcm_decrypt(struct aead_request *req)
+{
+ return ccp_aes_gcm_crypt(req, false);
+}
+
+static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm)
+{
+ struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+
+ ctx->complete = ccp_aes_gcm_complete;
+ ctx->u.aes.key_len = 0;
+
+ crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
+
+ return 0;
+}
+
+static void ccp_aes_gcm_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct aead_alg ccp_aes_gcm_defaults = {
+ .setkey = ccp_aes_gcm_setkey,
+ .setauthsize = ccp_aes_gcm_setauthsize,
+ .encrypt = ccp_aes_gcm_encrypt,
+ .decrypt = ccp_aes_gcm_decrypt,
+ .init = ccp_aes_gcm_cra_init,
+ .ivsize = AES_GCM_IVSIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_KERN_DRIVER_ONLY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ccp_ctx),
+ .cra_priority = CCP_CRA_PRIORITY,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_exit = ccp_aes_gcm_cra_exit,
+ .cra_module = THIS_MODULE,
+ },
+};
+
+struct ccp_aes_aead_def {
+ enum ccp_aes_mode mode;
+ unsigned int version;
+ const char *name;
+ const char *driver_name;
+ unsigned int blocksize;
+ unsigned int ivsize;
+ struct aead_alg *alg_defaults;
+};
+
+static struct ccp_aes_aead_def aes_aead_algs[] = {
+ {
+ .mode = CCP_AES_MODE_GHASH,
+ .version = CCP_VERSION(5, 0),
+ .name = "gcm(aes)",
+ .driver_name = "gcm-aes-ccp",
+ .blocksize = 1,
+ .ivsize = AES_BLOCK_SIZE,
+ .alg_defaults = &ccp_aes_gcm_defaults,
+ },
+};
+
+static int ccp_register_aes_aead(struct list_head *head,
+ const struct ccp_aes_aead_def *def)
+{
+ struct ccp_crypto_aead *ccp_aead;
+ struct aead_alg *alg;
+ int ret;
+
+ ccp_aead = kzalloc(sizeof(*ccp_aead), GFP_KERNEL);
+ if (!ccp_aead)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ccp_aead->entry);
+
+ ccp_aead->mode = def->mode;
+
+ /* Copy the defaults and override as necessary */
+ alg = &ccp_aead->alg;
+ *alg = *def->alg_defaults;
+ snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+ snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+ def->driver_name);
+ alg->base.cra_blocksize = def->blocksize;
+ alg->base.cra_ablkcipher.ivsize = def->ivsize;
+
+ ret = crypto_register_aead(alg);
+ if (ret) {
+ pr_err("%s ablkcipher algorithm registration error (%d)\n",
+ alg->base.cra_name, ret);
+ kfree(ccp_aead);
+ return ret;
+ }
+
+ list_add(&ccp_aead->entry, head);
+
+ return 0;
+}
+
+int ccp_register_aes_aeads(struct list_head *head)
+{
+ int i, ret;
+ unsigned int ccpversion = ccp_version();
+
+ for (i = 0; i < ARRAY_SIZE(aes_aead_algs); i++) {
+ if (aes_aead_algs[i].version > ccpversion)
+ continue;
+ ret = ccp_register_aes_aead(head, &aes_aead_algs[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 38d4466..e68012a 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -42,6 +42,7 @@
static LIST_HEAD(hash_algs);
static LIST_HEAD(cipher_algs);
static LIST_HEAD(akcipher_algs);
+static LIST_HEAD(aead_algs);
/* For any tfm, requests for that tfm must be returned on the order
* received. With multiple queues available, the CCP can process more
@@ -341,6 +342,10 @@ static int ccp_register_algs(void)
ret = ccp_register_aes_xts_algs(&cipher_algs);
if (ret)
return ret;
+
+ ret = ccp_register_aes_aeads(&aead_algs);
+ if (ret)
+ return ret;
}
if (!sha_disable) {
@@ -363,6 +368,7 @@ static void ccp_unregister_algs(void)
struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
struct ccp_crypto_akcipher_alg *ak_alg, *ak_tmp;
+ struct ccp_crypto_aead *aead_alg, *aead_tmp;
list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
crypto_unregister_ahash(&ahash_alg->alg);
@@ -381,6 +387,12 @@ static void ccp_unregister_algs(void)
list_del(&ak_alg->entry);
kfree(ak_alg);
}
+
+ list_for_each_entry_safe(aead_alg, aead_tmp, &aead_algs, entry) {
+ crypto_unregister_aead(&aead_alg->alg);
+ list_del(&aead_alg->entry);
+ kfree(aead_alg);
+ }
}
static int ccp_crypto_init(void)
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 76d8b63..cc5044c 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -19,6 +19,8 @@
#include <linux/ccp.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
+#include <crypto/internal/aead.h>
+#include <crypto/aead.h>
#include <crypto/ctr.h>
#include <crypto/hash.h>
#include <crypto/sha.h>
@@ -35,6 +37,14 @@ struct ccp_crypto_ablkcipher_alg {
struct crypto_alg alg;
};
+struct ccp_crypto_aead {
+ struct list_head entry;
+
+ u32 mode;
+
+ struct aead_alg alg;
+};
+
struct ccp_crypto_ahash_alg {
struct list_head entry;
@@ -103,6 +113,9 @@ struct ccp_aes_req_ctx {
struct scatterlist iv_sg;
u8 iv[AES_BLOCK_SIZE];
+ struct scatterlist tag_sg;
+ u8 tag[AES_BLOCK_SIZE];
+
/* Fields used for RFC3686 requests */
u8 *rfc3686_info;
u8 rfc3686_iv[AES_BLOCK_SIZE];
@@ -244,6 +257,7 @@ struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
int ccp_register_aes_algs(struct list_head *head);
int ccp_register_aes_cmac_algs(struct list_head *head);
int ccp_register_aes_xts_algs(struct list_head *head);
+int ccp_register_aes_aeads(struct list_head *head);
int ccp_register_sha_algs(struct list_head *head);
int ccp_register_rsa_algs(struct list_head *head);
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index b31be75..f386fca 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -289,6 +289,8 @@ static int ccp5_perform_aes(struct ccp_op *op)
CCP_AES_TYPE(&function) = op->u.aes.type;
if (op->u.aes.mode == CCP_AES_MODE_CFB)
CCP_AES_SIZE(&function) = 0x7f;
+ if ((op->u.aes.mode == CCP_AES_MODE_GCTR) && op->eom)
+ CCP_AES_SIZE(&function) = op->u.aes.size;
CCP5_CMD_FUNCTION(&desc) = function.raw;
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index f2e9bcb..1424f69 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -467,6 +467,7 @@ struct ccp_aes_op {
enum ccp_aes_type type;
enum ccp_aes_mode mode;
enum ccp_aes_action action;
+ unsigned int size;
};
struct ccp_xts_aes_op {
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index f7398e9..98f7983 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -600,6 +600,255 @@ static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
return ret;
}
+static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
+ struct ccp_cmd *cmd)
+{
+ struct ccp_aes_engine *aes = &cmd->u.aes;
+ struct ccp_dm_workarea key, ctx, final_wa, tag;
+ struct ccp_data src, dst;
+ struct ccp_data aad;
+ struct ccp_op op;
+
+ unsigned long long *final;
+ unsigned int dm_offset;
+ unsigned int ilen;
+ bool in_place = true; /* Default value */
+ int ret;
+
+ struct scatterlist *p_inp, sg_inp[2];
+ struct scatterlist *p_tag, sg_tag[2];
+ struct scatterlist *p_outp, sg_outp[2];
+ struct scatterlist *p_aad;
+
+ if (!aes->iv)
+ return -EINVAL;
+
+ if (!((aes->key_len == AES_KEYSIZE_128) ||
+ (aes->key_len == AES_KEYSIZE_192) ||
+ (aes->key_len == AES_KEYSIZE_256)))
+ return -EINVAL;
+
+ if (!aes->key) /* Gotta have a key SGL */
+ return -EINVAL;
+
+ /* First, decompose the source buffer into AAD & PT,
+ * and the destination buffer into AAD, CT & tag, or
+ * the input into CT & tag.
+ * It is expected that the input and output SGs will
+ * be valid, even if the AAD and input lengths are 0.
+ */
+ p_aad = aes->src;
+ p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
+ p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
+ if (aes->action == CCP_AES_ACTION_ENCRYPT) {
+ ilen = aes->src_len;
+ p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
+ } else {
+ /* Input length for decryption includes tag */
+ ilen = aes->src_len - AES_BLOCK_SIZE;
+ p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
+ }
+
+ memset(&op, 0, sizeof(op));
+ op.cmd_q = cmd_q;
+ op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+ op.sb_key = cmd_q->sb_key; /* Pre-allocated */
+ op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
+ op.init = 1;
+ op.u.aes.type = aes->type;
+
+ /* Copy the key to the LSB */
+ ret = ccp_init_dm_workarea(&key, cmd_q,
+ CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+ DMA_TO_DEVICE);
+ if (ret)
+ return ret;
+
+ dm_offset = CCP_SB_BYTES - aes->key_len;
+ ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+ ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_key;
+ }
+
+ /* Copy the context (IV) to the LSB.
+ * There is an assumption here that the IV is 96 bits in length, plus
+ * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
+ */
+ ret = ccp_init_dm_workarea(&ctx, cmd_q,
+ CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+ DMA_BIDIRECTIONAL);
+ if (ret)
+ goto e_key;
+
+ dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
+ ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+
+ ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_ctx;
+ }
+
+ op.init = 1;
+ if (aes->aad_len > 0) {
+ /* Step 1: Run a GHASH over the Additional Authenticated Data */
+ ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
+ AES_BLOCK_SIZE,
+ DMA_TO_DEVICE);
+ if (ret)
+ goto e_ctx;
+
+ op.u.aes.mode = CCP_AES_MODE_GHASH;
+ op.u.aes.action = CCP_AES_GHASHAAD;
+
+ while (aad.sg_wa.bytes_left) {
+ ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
+
+ ret = cmd_q->ccp->vdata->perform->aes(&op);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_aad;
+ }
+
+ ccp_process_data(&aad, NULL, &op);
+ op.init = 0;
+ }
+ }
+
+ op.u.aes.mode = CCP_AES_MODE_GCTR;
+ op.u.aes.action = aes->action;
+
+ if (ilen > 0) {
+ /* Step 2: Run a GCTR over the plaintext */
+ in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
+
+ ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
+ AES_BLOCK_SIZE,
+ in_place ? DMA_BIDIRECTIONAL
+ : DMA_TO_DEVICE);
+ if (ret)
+ goto e_ctx;
+
+ if (in_place) {
+ dst = src;
+ } else {
+ ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
+ AES_BLOCK_SIZE, DMA_FROM_DEVICE);
+ if (ret)
+ goto e_src;
+ }
+
+ op.soc = 0;
+ op.eom = 0;
+ op.init = 1;
+ while (src.sg_wa.bytes_left) {
+ ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
+ if (!src.sg_wa.bytes_left) {
+ unsigned int nbytes = aes->src_len
+ % AES_BLOCK_SIZE;
+
+ if (nbytes) {
+ op.eom = 1;
+ op.u.aes.size = (nbytes * 8) - 1;
+ }
+ }
+
+ ret = cmd_q->ccp->vdata->perform->aes(&op);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_dst;
+ }
+
+ ccp_process_data(&src, &dst, &op);
+ op.init = 0;
+ }
+ }
+
+ /* Step 3: Update the IV portion of the context with the original IV */
+ ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_dst;
+ }
+
+ ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+
+ ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_dst;
+ }
+
+ /* Step 4: Concatenate the lengths of the AAD and source, and
+ * hash that 16 byte buffer.
+ */
+ ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (ret)
+ goto e_dst;
+ final = (unsigned long long *) final_wa.address;
+ final[0] = cpu_to_be64(aes->aad_len * 8);
+ final[1] = cpu_to_be64(ilen * 8);
+
+ op.u.aes.mode = CCP_AES_MODE_GHASH;
+ op.u.aes.action = CCP_AES_GHASHFINAL;
+ op.src.type = CCP_MEMTYPE_SYSTEM;
+ op.src.u.dma.address = final_wa.dma.address;
+ op.src.u.dma.length = AES_BLOCK_SIZE;
+ op.dst.type = CCP_MEMTYPE_SYSTEM;
+ op.dst.u.dma.address = final_wa.dma.address;
+ op.dst.u.dma.length = AES_BLOCK_SIZE;
+ op.eom = 1;
+ op.u.aes.size = 0;
+ ret = cmd_q->ccp->vdata->perform->aes(&op);
+ if (ret)
+ goto e_dst;
+
+ if (aes->action == CCP_AES_ACTION_ENCRYPT) {
+ /* Put the ciphered tag after the ciphertext. */
+ ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE);
+ } else {
+ /* Does this ciphered tag match the input? */
+ ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE,
+ DMA_BIDIRECTIONAL);
+ if (ret)
+ goto e_tag;
+ ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
+
+ ret = memcmp(tag.address, final_wa.address, AES_BLOCK_SIZE);
+ ccp_dm_free(&tag);
+ }
+
+e_tag:
+ ccp_dm_free(&final_wa);
+
+e_dst:
+ if (aes->src_len && !in_place)
+ ccp_free_data(&dst, cmd_q);
+
+e_src:
+ if (aes->src_len)
+ ccp_free_data(&src, cmd_q);
+
+e_aad:
+ if (aes->aad_len)
+ ccp_free_data(&aad, cmd_q);
+
+e_ctx:
+ ccp_dm_free(&ctx);
+
+e_key:
+ ccp_dm_free(&key);
+
+ return ret;
+}
+
static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
{
struct ccp_aes_engine *aes = &cmd->u.aes;
@@ -613,6 +862,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
if (aes->mode == CCP_AES_MODE_CMAC)
return ccp_run_aes_cmac_cmd(cmd_q, cmd);
+ if (aes->mode == CCP_AES_MODE_GCM)
+ return ccp_run_aes_gcm_cmd(cmd_q, cmd);
+
if (!((aes->key_len == AES_KEYSIZE_128) ||
(aes->key_len == AES_KEYSIZE_192) ||
(aes->key_len == AES_KEYSIZE_256)))
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index dd90670..423561a 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -122,6 +122,10 @@ enum ccp_aes_mode {
CCP_AES_MODE_CFB,
CCP_AES_MODE_CTR,
CCP_AES_MODE_CMAC,
+ CCP_AES_MODE_GHASH,
+ CCP_AES_MODE_GCTR,
+ CCP_AES_MODE_GCM,
+ CCP_AES_MODE_GMAC,
CCP_AES_MODE__LAST,
};
@@ -136,6 +140,9 @@ enum ccp_aes_action {
CCP_AES_ACTION_ENCRYPT,
CCP_AES_ACTION__LAST,
};
+/* Overloaded field */
+#define CCP_AES_GHASHAAD CCP_AES_ACTION_DECRYPT
+#define CCP_AES_GHASHFINAL CCP_AES_ACTION_ENCRYPT
/**
* struct ccp_aes_engine - CCP AES operation
@@ -180,6 +187,8 @@ struct ccp_aes_engine {
struct scatterlist *cmac_key; /* K1/K2 cmac key required for
* final cmac cmd */
u32 cmac_key_len; /* In bytes */
+
+ u32 aad_len; /* In bytes */
};
/***** XTS-AES engine *****/
^ permalink raw reply related
* [PATCH V2 9/9] crypto: ccp - Enable 3DES function on v5 CCPs
From: Gary R Hook @ 2016-11-04 16:05 UTC (permalink / raw)
To: linux-crypto; +Cc: thomas.lendacky, herbert, davem
In-Reply-To: <20161104160140.18155.75618.stgit@taos>
Wire up support for Triple DES in ECB mode.
Signed-off-by: Gary R Hook <gary.hook@amd.com>
---
drivers/crypto/ccp/Makefile | 1
drivers/crypto/ccp/ccp-crypto-des3.c | 254 ++++++++++++++++++++++++++++++++++
drivers/crypto/ccp/ccp-crypto-main.c | 10 +
drivers/crypto/ccp/ccp-crypto.h | 20 +++
drivers/crypto/ccp/ccp-dev-v3.c | 1
drivers/crypto/ccp/ccp-dev-v5.c | 54 +++++++
drivers/crypto/ccp/ccp-dev.h | 14 ++
drivers/crypto/ccp/ccp-ops.c | 198 +++++++++++++++++++++++++++
include/linux/ccp.h | 57 +++++++-
9 files changed, 606 insertions(+), 3 deletions(-)
create mode 100644 drivers/crypto/ccp/ccp-crypto-des3.c
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index fd77225..563594a 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -14,4 +14,5 @@ ccp-crypto-objs := ccp-crypto-main.o \
ccp-crypto-aes-xts.o \
ccp-crypto-rsa.o \
ccp-crypto-aes-galois.o \
+ ccp-crypto-des3.o \
ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
new file mode 100644
index 0000000..5af7347
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -0,0 +1,254 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <ghook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/des.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_des3_complete(struct crypto_async_request *async_req, int ret)
+{
+ struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+ struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+ struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+ if (ret)
+ return ret;
+
+ if (ctx->u.des3.mode != CCP_DES3_MODE_ECB)
+ memcpy(req->info, rctx->iv, DES3_EDE_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+ struct ccp_crypto_ablkcipher_alg *alg =
+ ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+ u32 *flags = &tfm->base.crt_flags;
+
+
+ /* From des_generic.c:
+ *
+ * RFC2451:
+ * If the first two or last two independent 64-bit keys are
+ * equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ * same as DES. Implementers MUST reject keys that exhibit this
+ * property.
+ */
+ const u32 *K = (const u32 *)key;
+
+ if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+ !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+ (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+ *flags |= CRYPTO_TFM_RES_WEAK_KEY;
+ return -EINVAL;
+ }
+
+ /* It's not clear that there is any support for a keysize of 112.
+ * If needed, the caller should make K1 == K3
+ */
+ ctx->u.des3.type = CCP_DES3_TYPE_168;
+ ctx->u.des3.mode = alg->mode;
+ ctx->u.des3.key_len = key_len;
+
+ memcpy(ctx->u.des3.key, key, key_len);
+ sg_init_one(&ctx->u.des3.key_sg, ctx->u.des3.key, key_len);
+
+ return 0;
+}
+
+static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+ struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+ struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+ struct scatterlist *iv_sg = NULL;
+ unsigned int iv_len = 0;
+ int ret;
+
+ if (!ctx->u.des3.key_len)
+ return -EINVAL;
+
+ if (((ctx->u.des3.mode == CCP_DES3_MODE_ECB) ||
+ (ctx->u.des3.mode == CCP_DES3_MODE_CBC)) &&
+ (req->nbytes & (DES3_EDE_BLOCK_SIZE - 1)))
+ return -EINVAL;
+
+ if (ctx->u.des3.mode != CCP_DES3_MODE_ECB) {
+ if (!req->info)
+ return -EINVAL;
+
+ memcpy(rctx->iv, req->info, DES3_EDE_BLOCK_SIZE);
+ iv_sg = &rctx->iv_sg;
+ iv_len = DES3_EDE_BLOCK_SIZE;
+ sg_init_one(iv_sg, rctx->iv, iv_len);
+ }
+
+ memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+ INIT_LIST_HEAD(&rctx->cmd.entry);
+ rctx->cmd.engine = CCP_ENGINE_DES3;
+ rctx->cmd.u.des3.type = ctx->u.des3.type;
+ rctx->cmd.u.des3.mode = ctx->u.des3.mode;
+ rctx->cmd.u.des3.action = (encrypt)
+ ? CCP_DES3_ACTION_ENCRYPT
+ : CCP_DES3_ACTION_DECRYPT;
+ rctx->cmd.u.des3.key = &ctx->u.des3.key_sg;
+ rctx->cmd.u.des3.key_len = ctx->u.des3.key_len;
+ rctx->cmd.u.des3.iv = iv_sg;
+ rctx->cmd.u.des3.iv_len = iv_len;
+ rctx->cmd.u.des3.src = req->src;
+ rctx->cmd.u.des3.src_len = req->nbytes;
+ rctx->cmd.u.des3.dst = req->dst;
+
+ ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+ return ret;
+}
+
+static int ccp_des3_encrypt(struct ablkcipher_request *req)
+{
+ return ccp_des3_crypt(req, true);
+}
+
+static int ccp_des3_decrypt(struct ablkcipher_request *req)
+{
+ return ccp_des3_crypt(req, false);
+}
+
+static int ccp_des3_cra_init(struct crypto_tfm *tfm)
+{
+ struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->complete = ccp_des3_complete;
+ ctx->u.des3.key_len = 0;
+
+ tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_des3_req_ctx);
+
+ return 0;
+}
+
+static void ccp_des3_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg ccp_des3_defaults = {
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER |
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_KERN_DRIVER_ONLY |
+ CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ccp_ctx),
+ .cra_priority = CCP_CRA_PRIORITY,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_init = ccp_des3_cra_init,
+ .cra_exit = ccp_des3_cra_exit,
+ .cra_module = THIS_MODULE,
+ .cra_ablkcipher = {
+ .setkey = ccp_des3_setkey,
+ .encrypt = ccp_des3_encrypt,
+ .decrypt = ccp_des3_decrypt,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ },
+};
+
+struct ccp_des3_def {
+ enum ccp_des3_mode mode;
+ unsigned int version;
+ const char *name;
+ const char *driver_name;
+ unsigned int blocksize;
+ unsigned int ivsize;
+ struct crypto_alg *alg_defaults;
+};
+
+static struct ccp_des3_def des3_algs[] = {
+ {
+ .mode = CCP_DES3_MODE_ECB,
+ .version = CCP_VERSION(5, 0),
+ .name = "ecb(des3_ede)",
+ .driver_name = "ecb-des3-ccp",
+ .blocksize = DES3_EDE_BLOCK_SIZE,
+ .ivsize = 0,
+ .alg_defaults = &ccp_des3_defaults,
+ },
+ {
+ .mode = CCP_DES3_MODE_CBC,
+ .version = CCP_VERSION(5, 0),
+ .name = "cbc(des3_ede)",
+ .driver_name = "cbc-des3-ccp",
+ .blocksize = DES3_EDE_BLOCK_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .alg_defaults = &ccp_des3_defaults,
+ },
+};
+
+static int ccp_register_des3_alg(struct list_head *head,
+ const struct ccp_des3_def *def)
+{
+ struct ccp_crypto_ablkcipher_alg *ccp_alg;
+ struct crypto_alg *alg;
+ int ret;
+
+ ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+ if (!ccp_alg)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&ccp_alg->entry);
+
+ ccp_alg->mode = def->mode;
+
+ /* Copy the defaults and override as necessary */
+ alg = &ccp_alg->alg;
+ *alg = *def->alg_defaults;
+ snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+ snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+ def->driver_name);
+ alg->cra_blocksize = def->blocksize;
+ alg->cra_ablkcipher.ivsize = def->ivsize;
+
+ ret = crypto_register_alg(alg);
+ if (ret) {
+ pr_err("%s ablkcipher algorithm registration error (%d)\n",
+ alg->cra_name, ret);
+ kfree(ccp_alg);
+ return ret;
+ }
+
+ list_add(&ccp_alg->entry, head);
+
+ return 0;
+}
+
+int ccp_register_des3_algs(struct list_head *head)
+{
+ int i, ret;
+ unsigned int ccpversion = ccp_version();
+
+ for (i = 0; i < ARRAY_SIZE(des3_algs); i++) {
+ if (des3_algs[i].version > ccpversion)
+ continue;
+ ret = ccp_register_des3_alg(head, &des3_algs[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index e68012a..f492286 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -38,6 +38,10 @@
module_param(rsa_disable, uint, 0444);
MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
+static unsigned int des3_disable;
+module_param(des3_disable, uint, 0444);
+MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
+
/* List heads for the supported algorithms */
static LIST_HEAD(hash_algs);
static LIST_HEAD(cipher_algs);
@@ -348,6 +352,12 @@ static int ccp_register_algs(void)
return ret;
}
+ if (!des3_disable) {
+ ret = ccp_register_des3_algs(&cipher_algs);
+ if (ret)
+ return ret;
+ }
+
if (!sha_disable) {
ret = ccp_register_sha_algs(&hash_algs);
if (ret)
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index cc5044c..cce3c9d 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -158,6 +158,24 @@ struct ccp_aes_cmac_exp_ctx {
u8 buf[AES_BLOCK_SIZE];
};
+/***** 3DES related defines *****/
+struct ccp_des3_ctx {
+ enum ccp_engine engine;
+ enum ccp_des3_type type;
+ enum ccp_des3_mode mode;
+
+ struct scatterlist key_sg;
+ unsigned int key_len;
+ u8 key[AES_MAX_KEY_SIZE];
+};
+
+struct ccp_des3_req_ctx {
+ struct scatterlist iv_sg;
+ u8 iv[AES_BLOCK_SIZE];
+
+ struct ccp_cmd cmd;
+};
+
/* SHA-related defines
* These values must be large enough to accommodate any variant
*/
@@ -246,6 +264,7 @@ struct ccp_ctx {
struct ccp_aes_ctx aes;
struct ccp_rsa_ctx rsa;
struct ccp_sha_ctx sha;
+ struct ccp_des3_ctx des3;
} u;
};
@@ -260,5 +279,6 @@ struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
int ccp_register_aes_aeads(struct list_head *head);
int ccp_register_sha_algs(struct list_head *head);
int ccp_register_rsa_algs(struct list_head *head);
+int ccp_register_des3_algs(struct list_head *head);
#endif
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 3a55628..6c8c58e 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -553,6 +553,7 @@ static irqreturn_t ccp_irq_handler(int irq, void *data)
static const struct ccp_actions ccp3_actions = {
.aes = ccp_perform_aes,
.xts_aes = ccp_perform_xts_aes,
+ .des3 = NULL,
.sha = ccp_perform_sha,
.rsa = ccp_perform_rsa,
.passthru = ccp_perform_passthru,
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index f386fca..811668f 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -108,6 +108,12 @@ static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
u16 type:2;
} aes_xts;
struct {
+ u16 size:7;
+ u16 encrypt:1;
+ u16 mode:5;
+ u16 type:2;
+ } des3;
+ struct {
u16 rsvd1:10;
u16 type:4;
u16 rsvd2:1;
@@ -139,6 +145,10 @@ static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
#define CCP_AES_TYPE(p) ((p)->aes.type)
#define CCP_XTS_SIZE(p) ((p)->aes_xts.size)
#define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt)
+#define CCP_DES3_SIZE(p) ((p)->des3.size)
+#define CCP_DES3_ENCRYPT(p) ((p)->des3.encrypt)
+#define CCP_DES3_MODE(p) ((p)->des3.mode)
+#define CCP_DES3_TYPE(p) ((p)->des3.type)
#define CCP_SHA_TYPE(p) ((p)->sha.type)
#define CCP_RSA_SIZE(p) ((p)->rsa.size)
#define CCP_PT_BYTESWAP(p) ((p)->pt.byteswap)
@@ -391,6 +401,47 @@ static int ccp5_perform_sha(struct ccp_op *op)
return ccp5_do_cmd(&desc, op->cmd_q);
}
+static int ccp5_perform_des3(struct ccp_op *op)
+{
+ struct ccp5_desc desc;
+ union ccp_function function;
+ u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+ /* Zero out all the fields of the command desc */
+ memset(&desc, 0, sizeof(struct ccp5_desc));
+
+ CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_DES3;
+
+ CCP5_CMD_SOC(&desc) = op->soc;
+ CCP5_CMD_IOC(&desc) = 1;
+ CCP5_CMD_INIT(&desc) = op->init;
+ CCP5_CMD_EOM(&desc) = op->eom;
+ CCP5_CMD_PROT(&desc) = 0;
+
+ function.raw = 0;
+ CCP_DES3_ENCRYPT(&function) = op->u.des3.action;
+ CCP_DES3_MODE(&function) = op->u.des3.mode;
+ CCP_DES3_TYPE(&function) = op->u.des3.type;
+ CCP5_CMD_FUNCTION(&desc) = cpu_to_le32(function.raw);
+
+ CCP5_CMD_LEN(&desc) = cpu_to_le32(op->src.u.dma.length);
+
+ CCP5_CMD_SRC_LO(&desc) = cpu_to_le32(ccp_addr_lo(&op->src.u.dma));
+ CCP5_CMD_SRC_HI(&desc) = cpu_to_le32(ccp_addr_hi(&op->src.u.dma));
+ CCP5_CMD_SRC_MEM(&desc) = cpu_to_le32(CCP_MEMTYPE_SYSTEM);
+
+ CCP5_CMD_DST_LO(&desc) = cpu_to_le32(ccp_addr_lo(&op->dst.u.dma));
+ CCP5_CMD_DST_HI(&desc) = cpu_to_le32(ccp_addr_hi(&op->dst.u.dma));
+ CCP5_CMD_DST_MEM(&desc) = cpu_to_le32(CCP_MEMTYPE_SYSTEM);
+
+ CCP5_CMD_KEY_LO(&desc) = cpu_to_le32(lower_32_bits(key_addr));
+ CCP5_CMD_KEY_HI(&desc) = 0;
+ CCP5_CMD_KEY_MEM(&desc) = cpu_to_le32(CCP_MEMTYPE_SB);
+ CCP5_CMD_LSB_ID(&desc) = cpu_to_le32(op->sb_ctx);
+
+ return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
static int ccp5_perform_rsa(struct ccp_op *op)
{
struct ccp5_desc desc;
@@ -438,6 +489,7 @@ static int ccp5_perform_passthru(struct ccp_op *op)
struct ccp_dma_info *saddr = &op->src.u.dma;
struct ccp_dma_info *daddr = &op->dst.u.dma;
+
memset(&desc, 0, Q_DESC_SIZE);
CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
@@ -732,6 +784,7 @@ static int ccp5_init(struct ccp_device *ccp)
dev_dbg(dev, "queue #%u available\n", i);
}
+
if (ccp->cmd_q_count == 0) {
dev_notice(dev, "no command queues available\n");
ret = -EIO;
@@ -998,6 +1051,7 @@ static void ccp5other_config(struct ccp_device *ccp)
.aes = ccp5_perform_aes,
.xts_aes = ccp5_perform_xts_aes,
.sha = ccp5_perform_sha,
+ .des3 = ccp5_perform_des3,
.rsa = ccp5_perform_rsa,
.passthru = ccp5_perform_passthru,
.ecc = ccp5_perform_ecc,
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 1424f69..c25be6c 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -190,6 +190,9 @@
#define CCP_XTS_AES_KEY_SB_COUNT 1
#define CCP_XTS_AES_CTX_SB_COUNT 1
+#define CCP_DES3_KEY_SB_COUNT 1
+#define CCP_DES3_CTX_SB_COUNT 1
+
#define CCP_SHA_SB_COUNT 1
#define CCP_RSA_MAX_WIDTH 4096
@@ -475,6 +478,12 @@ struct ccp_xts_aes_op {
enum ccp_xts_aes_unit_size unit_size;
};
+struct ccp_des3_op {
+ enum ccp_des3_type type;
+ enum ccp_des3_mode mode;
+ enum ccp_des3_action action;
+};
+
struct ccp_sha_op {
enum ccp_sha_type type;
u64 msg_bits;
@@ -512,6 +521,7 @@ struct ccp_op {
union {
struct ccp_aes_op aes;
struct ccp_xts_aes_op xts;
+ struct ccp_des3_op des3;
struct ccp_sha_op sha;
struct ccp_rsa_op rsa;
struct ccp_passthru_op passthru;
@@ -620,13 +630,13 @@ struct ccp5_desc {
struct ccp_actions {
int (*aes)(struct ccp_op *);
int (*xts_aes)(struct ccp_op *);
+ int (*des3)(struct ccp_op *);
int (*sha)(struct ccp_op *);
int (*rsa)(struct ccp_op *);
int (*passthru)(struct ccp_op *);
int (*ecc)(struct ccp_op *);
u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int);
- void (*sbfree)(struct ccp_cmd_queue *, unsigned int,
- unsigned int);
+ void (*sbfree)(struct ccp_cmd_queue *, unsigned int, unsigned int);
unsigned int (*get_free_slots)(struct ccp_cmd_queue *);
int (*init)(struct ccp_device *);
void (*destroy)(struct ccp_device *);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 98f7983..bdf1147 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -16,6 +16,7 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <crypto/scatterwalk.h>
+#include <crypto/des.h>
#include <linux/ccp.h>
#include "ccp-dev.h"
@@ -1183,6 +1184,200 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
return ret;
}
+static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+ struct ccp_des3_engine *des3 = &cmd->u.des3;
+
+ struct ccp_dm_workarea key, ctx;
+ struct ccp_data src, dst;
+ struct ccp_op op;
+ unsigned int dm_offset;
+ unsigned int len_singlekey;
+ bool in_place = false;
+ int ret;
+
+ /* Error checks */
+ if (!cmd_q->ccp->vdata->perform->des3)
+ return -EINVAL;
+
+ if (des3->key_len != DES3_EDE_KEY_SIZE)
+ return -EINVAL;
+
+ if (((des3->mode == CCP_DES3_MODE_ECB) ||
+ (des3->mode == CCP_DES3_MODE_CBC)) &&
+ (des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
+ return -EINVAL;
+
+ if (!des3->key || !des3->src || !des3->dst)
+ return -EINVAL;
+
+ if (des3->mode != CCP_DES3_MODE_ECB) {
+ if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (!des3->iv)
+ return -EINVAL;
+ }
+
+ ret = -EIO;
+ /* Zero out all the fields of the command desc */
+ memset(&op, 0, sizeof(op));
+
+ /* Set up the Function field */
+ op.cmd_q = cmd_q;
+ op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+ op.sb_key = cmd_q->sb_key;
+
+ op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
+ op.u.des3.type = des3->type;
+ op.u.des3.mode = des3->mode;
+ op.u.des3.action = des3->action;
+
+ /*
+ * All supported key sizes fit in a single (32-byte) KSB entry and
+ * (like AES) must be in little endian format. Use the 256-bit byte
+ * swap passthru option to convert from big endian to little endian.
+ */
+ ret = ccp_init_dm_workarea(&key, cmd_q,
+ CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
+ DMA_TO_DEVICE);
+ if (ret)
+ return ret;
+
+ /*
+ * The contents of the key triplet are in the reverse order of what
+ * is required by the engine. Copy the 3 pieces individually to put
+ * them where they belong.
+ */
+ dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
+
+ len_singlekey = des3->key_len / 3;
+ ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
+ des3->key, 0, len_singlekey);
+ ccp_set_dm_area(&key, dm_offset + len_singlekey,
+ des3->key, len_singlekey, len_singlekey);
+ ccp_set_dm_area(&key, dm_offset,
+ des3->key, 2 * len_singlekey, len_singlekey);
+
+ /* Copy the key to the SB */
+ ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_key;
+ }
+
+ /*
+ * The DES3 context fits in a single (32-byte) KSB entry and
+ * must be in little endian format. Use the 256-bit byte swap
+ * passthru option to convert from big endian to little endian.
+ */
+ if (des3->mode != CCP_DES3_MODE_ECB) {
+ u32 load_mode;
+
+ op.sb_ctx = cmd_q->sb_ctx;
+
+ ret = ccp_init_dm_workarea(&ctx, cmd_q,
+ CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
+ DMA_BIDIRECTIONAL);
+ if (ret)
+ goto e_key;
+
+ /* Load the context into the LSB */
+ dm_offset = CCP_SB_BYTES - des3->iv_len;
+ ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0, des3->iv_len);
+
+ if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+ load_mode = CCP_PASSTHRU_BYTESWAP_NOOP;
+ else
+ load_mode = CCP_PASSTHRU_BYTESWAP_256BIT;
+ ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+ load_mode);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_ctx;
+ }
+ }
+
+ /*
+ * Prepare the input and output data workareas. For in-place
+ * operations we need to set the dma direction to BIDIRECTIONAL
+ * and copy the src workarea to the dst workarea.
+ */
+ if (sg_virt(des3->src) == sg_virt(des3->dst))
+ in_place = true;
+
+ ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
+ DES3_EDE_BLOCK_SIZE,
+ in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+ if (ret)
+ goto e_ctx;
+
+ if (in_place)
+ dst = src;
+ else {
+ ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
+ DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
+ if (ret)
+ goto e_src;
+ }
+
+ /* Send data to the CCP DES3 engine */
+ while (src.sg_wa.bytes_left) {
+ ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
+ if (!src.sg_wa.bytes_left) {
+ op.eom = 1;
+
+ /* Since we don't retrieve the context in ECB mode
+ * we have to wait for the operation to complete
+ * on the last piece of data
+ */
+ op.soc = 0;
+ }
+
+ ret = cmd_q->ccp->vdata->perform->des3(&op);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_dst;
+ }
+
+ ccp_process_data(&src, &dst, &op);
+ }
+
+ if (des3->mode != CCP_DES3_MODE_ECB) {
+ /* Retrieve the context and make BE */
+ ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+ CCP_PASSTHRU_BYTESWAP_256BIT);
+ if (ret) {
+ cmd->engine_error = cmd_q->cmd_error;
+ goto e_dst;
+ }
+
+ /* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
+ if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+ dm_offset = CCP_SB_BYTES - des3->iv_len;
+ else
+ dm_offset = 0;
+ ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
+ DES3_EDE_BLOCK_SIZE);
+ }
+e_dst:
+ if (!in_place)
+ ccp_free_data(&dst, cmd_q);
+
+e_src:
+ ccp_free_data(&src, cmd_q);
+
+e_ctx:
+ if (des3->mode != CCP_DES3_MODE_ECB)
+ ccp_dm_free(&ctx);
+
+e_key:
+ ccp_dm_free(&key);
+
+ return ret;
+}
+
static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
{
struct ccp_sha_engine *sha = &cmd->u.sha;
@@ -2165,6 +2360,9 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
case CCP_ENGINE_XTS_AES_128:
ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
break;
+ case CCP_ENGINE_DES3:
+ ret = ccp_run_des3_cmd(cmd_q, cmd);
+ break;
case CCP_ENGINE_SHA:
ret = ccp_run_sha_cmd(cmd_q, cmd);
break;
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index 423561a..985fd03 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -300,6 +300,60 @@ struct ccp_sha_engine {
* final sha cmd */
};
+/***** 3DES engine *****/
+enum ccp_des3_mode {
+ CCP_DES3_MODE_ECB = 0,
+ CCP_DES3_MODE_CBC,
+ CCP_DES3_MODE_CFB,
+ CCP_DES3_MODE__LAST,
+};
+
+enum ccp_des3_type {
+ CCP_DES3_TYPE_168 = 1,
+ CCP_DES3_TYPE__LAST,
+ };
+
+enum ccp_des3_action {
+ CCP_DES3_ACTION_DECRYPT = 0,
+ CCP_DES3_ACTION_ENCRYPT,
+ CCP_DES3_ACTION__LAST,
+};
+
+/**
+ * struct ccp_des3_engine - CCP SHA operation
+ * @type: Type of 3DES operation
+ * @mode: cipher mode
+ * @action: 3DES operation (decrypt/encrypt)
+ * @key: key to be used for this 3DES operation
+ * @key_len: length of key (in bytes)
+ * @iv: IV to be used for this AES operation
+ * @iv_len: length in bytes of iv
+ * @src: input data to be used for this operation
+ * @src_len: length of input data used for this operation (in bytes)
+ * @dst: output data produced by this operation
+ *
+ * Variables required to be set when calling ccp_enqueue_cmd():
+ * - type, mode, action, key, key_len, src, dst, src_len
+ * - iv, iv_len for any mode other than ECB
+ *
+ * The iv variable is used as both input and output. On completion of the
+ * 3DES operation the new IV overwrites the old IV.
+ */
+struct ccp_des3_engine {
+ enum ccp_des3_type type;
+ enum ccp_des3_mode mode;
+ enum ccp_des3_action action;
+
+ struct scatterlist *key;
+ u32 key_len; /* In bytes */
+
+ struct scatterlist *iv;
+ u32 iv_len; /* In bytes */
+
+ struct scatterlist *src, *dst;
+ u64 src_len; /* In bytes */
+};
+
/***** RSA engine *****/
/**
* struct ccp_rsa_engine - CCP RSA operation
@@ -549,7 +603,7 @@ struct ccp_ecc_engine {
enum ccp_engine {
CCP_ENGINE_AES = 0,
CCP_ENGINE_XTS_AES_128,
- CCP_ENGINE_RSVD1,
+ CCP_ENGINE_DES3,
CCP_ENGINE_SHA,
CCP_ENGINE_RSA,
CCP_ENGINE_PASSTHRU,
@@ -597,6 +651,7 @@ struct ccp_cmd {
union {
struct ccp_aes_engine aes;
struct ccp_xts_aes_engine xts;
+ struct ccp_des3_engine des3;
struct ccp_sha_engine sha;
struct ccp_rsa_engine rsa;
struct ccp_passthru_engine passthru;
^ permalink raw reply related
* Re: vmalloced stacks and scatterwalk_map_and_copy()
From: Eric Biggers @ 2016-11-04 17:05 UTC (permalink / raw)
To: Andy Lutomirski
Cc: linux-crypto, Herbert Xu, linux-kernel@vger.kernel.org,
Andrew Lutomirski, linux-mm@kvack.org
In-Reply-To: <CALCETrV=9vXDyQ5F5-bFD4YCn5P_j7jmYj2Tv+DXWH43m31NzA@mail.gmail.com>
On Thu, Nov 03, 2016 at 08:57:49PM -0700, Andy Lutomirski wrote:
>
> The crypto request objects can live on the stack just fine. It's the
> request buffers that need to live elsewhere (or the alternative
> interfaces can be used, or the crypto core code can start using
> something other than scatterlists).
>
There are cases where a crypto operation is done on a buffer embedded in a
request object. The example I'm aware of is in the GCM implementation
(crypto/gcm.c). Basically it needs to encrypt 16 zero bytes prepended with the
actual data, so it fills a buffer in the request object
(crypto_gcm_req_priv_ctx.auth_tag) with zeroes and builds a new scatterlist
which covers both this buffer and the original data scatterlist.
Granted, GCM provides the aead interface not the skcipher interface, and
currently there is no AEAD_REQUEST_ON_STACK() macro like there is a
SKCIPHER_REQUEST_ON_STACK() macro. So maybe no one is creating aead requests on
the stack right now. But it's something to watch out for.
Eric
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
* 20842 linux-crypto
From: archerrp @ 2016-11-04 17:19 UTC (permalink / raw)
To: linux-crypto
[-- Attachment #1: EMAIL_4418701382465_linux-crypto.zip --]
[-- Type: application/zip, Size: 4119 bytes --]
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Eric Biggers @ 2016-11-04 17:37 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: David Miller, Herbert Xu, linux-crypto, LKML, Martin Willi,
WireGuard mailing list, René van Dorst
In-Reply-To: <CAHmME9pm4DHuBsE+hoFxnm1B5OWAZ+OyKXzeKDxHtisZpw4ebg@mail.gmail.com>
On Thu, Nov 03, 2016 at 11:20:08PM +0100, Jason A. Donenfeld wrote:
> Hi David,
>
> On Thu, Nov 3, 2016 at 6:08 PM, David Miller <davem@davemloft.net> wrote:
> > In any event no piece of code should be doing 32-bit word reads from
> > addresses like "x + 3" without, at a very minimum, going through the
> > kernel unaligned access handlers.
>
> Excellent point. In otherwords,
>
> ctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff;
> ctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03;
> ctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff;
> ctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff;
> ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
>
> should change to:
>
> ctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff;
> ctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
> ctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
> ctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
> ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
>
I agree, and the current code is wrong; but do note that this proposal is
correct for poly1305_setrkey() but not for poly1305_setskey() and
poly1305_blocks(). In the latter two cases, 4-byte alignment of the source
buffer is *not* guaranteed. Although crypto_poly1305_update() will be called
with a 4-byte aligned buffer due to the alignmask set on poly1305_alg, the
algorithm operates on 16-byte blocks and therefore has to buffer partial blocks.
If some number of bytes that is not 0 mod 4 is buffered, then the buffer will
fall out of alignment on the next update call. Hence, get_unaligned_le32() is
actually needed on all the loads, since the buffer will, in general, be of
unknown alignment.
Note: some other shash algorithms have this problem too and do not handle it
correctly. It seems to be a common mistake.
Eric
^ permalink raw reply
* [PATCH] crypto: caam: do not register AES-XTS mode on LP units
From: Sven Ebenfeld @ 2016-11-04 23:17 UTC (permalink / raw)
To: linux-crypto, linux-kernel
Cc: herbert, davem, horia.geanta, cata.vasile, sven.ebenfeld
When using AES-XTS on a Wandboard, we receive a Mode error:
caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error.
Due to the Security Reference Manual, the Low Power AES units
of the i.MX6 do not support the XTS mode. Therefore we should
try to provide them them in the API.
Signed-off-by: Sven Ebenfeld <sven.ebenfeld@gmail.com>
---
drivers/crypto/caam/caamalg.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 156aad1..f5a63ba 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void)
if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
continue;
+ /*
+ * Check support for AES modes not available
+ * on LP devices.
+ */
+ if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP)
+ if ((alg->class1_alg_type & OP_ALG_AAI_MASK) ==
+ OP_ALG_AAI_XTS)
+ continue;
+
t_alg = caam_alg_alloc(alg);
if (IS_ERR(t_alg)) {
err = PTR_ERR(t_alg);
--
2.7.4
^ permalink raw reply related
* Re: [PATCH 1/2] fscrypto: don't use on-stack buffer for filename encryption
From: Kent Overstreet @ 2016-11-05 15:13 UTC (permalink / raw)
To: Eric Biggers
Cc: tytso, richard, linux-f2fs-devel, linux-crypto, luto,
linux-fsdevel, jaegeuk, linux-ext4
In-Reply-To: <1478210582-86338-1-git-send-email-ebiggers@google.com>
On Thu, Nov 03, 2016 at 03:03:01PM -0700, Eric Biggers wrote:
> With the new (in 4.9) option to use a virtually-mapped stack
> (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
> the scatterlist crypto API because they may not be directly mappable to
> struct page. For short filenames, fname_encrypt() was encrypting a
> stack buffer holding the padded filename. Fix it by encrypting the
> filename in-place in the output buffer, thereby making the temporary
> buffer unnecessary.
>
> This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
> because this allowed the BUG in sg_set_buf() to be triggered.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
> - if (!alloc_buf)
> - return -ENOMEM;
> - workbuf = alloc_buf;
Vmalloc memory does have struct pages - you just need to use vmalloc_to_page()
instead of virt_to_page. Look at drivers/md/bcache/util.c bch_bio_map() if you
want an example.
It would be better to just fix the sg code to handle vmalloc memory, instead of
adding a kmalloc() that can fail (and an error path that inevitably won't be
tested).
------------------------------------------------------------------------------
Developer Access Program for Intel Xeon Phi Processors
Access to Intel Xeon Phi processor-based developer platforms.
With one year of Intel Parallel Studio XE.
Training and support from Colfax.
Order your platform today. http://sdm.link/xeonphi
^ permalink raw reply
* 44225 linux-crypto
From: agiva @ 2016-11-06 18:30 UTC (permalink / raw)
To: linux-crypto
[-- Attachment #1: EMAIL_4797177399752_linux-crypto.zip --]
[-- Type: application/zip, Size: 4008 bytes --]
^ permalink raw reply
* Re: [PATCH 1/2] fscrypto: don't use on-stack buffer for filename encryption
From: Andy Lutomirski @ 2016-11-07 5:00 UTC (permalink / raw)
To: Kent Overstreet
Cc: linux-crypto, Eric Biggers, Richard Weinberger, jaegeuk,
linux-ext4@vger.kernel.org, linux-f2fs-devel, Linux FS Devel,
Ted Ts'o
In-Reply-To: <20161105151349.e5ap547uno3hfit7@kmo-pixel>
On Nov 5, 2016 8:13 AM, "Kent Overstreet" <kent.overstreet@gmail.com> wrote:
>
> On Thu, Nov 03, 2016 at 03:03:01PM -0700, Eric Biggers wrote:
> > With the new (in 4.9) option to use a virtually-mapped stack
> > (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
> > the scatterlist crypto API because they may not be directly mappable to
> > struct page. For short filenames, fname_encrypt() was encrypting a
> > stack buffer holding the padded filename. Fix it by encrypting the
> > filename in-place in the output buffer, thereby making the temporary
> > buffer unnecessary.
> >
> > This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
> > because this allowed the BUG in sg_set_buf() to be triggered.
> >
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
>
> > - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
> > - if (!alloc_buf)
> > - return -ENOMEM;
> > - workbuf = alloc_buf;
>
> Vmalloc memory does have struct pages - you just need to use vmalloc_to_page()
> instead of virt_to_page. Look at drivers/md/bcache/util.c bch_bio_map() if you
> want an example.
>
> It would be better to just fix the sg code to handle vmalloc memory, instead of
> adding a kmalloc() that can fail (and an error path that inevitably won't be
> tested).
Probably not, because (a) vmalloc_to_page is slow and (b) stack
buffers can span physically noncontiguous pages.
I think it's best to either avoid stack buffers or to teach crypto about kiov.
^ permalink raw reply
* Re: [PATCH 1/2] fscrypto: don't use on-stack buffer for filename encryption
From: Richard Weinberger @ 2016-11-07 13:15 UTC (permalink / raw)
To: Eric Biggers, linux-fsdevel
Cc: linux-ext4, linux-f2fs-devel, linux-crypto, tytso, jaegeuk, luto
In-Reply-To: <1478210582-86338-1-git-send-email-ebiggers@google.com>
On 03.11.2016 23:03, Eric Biggers wrote:
> With the new (in 4.9) option to use a virtually-mapped stack
> (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
> the scatterlist crypto API because they may not be directly mappable to
> struct page. For short filenames, fname_encrypt() was encrypting a
As Kent and Andy pointed out, they are but here are dragons.
The pages can be non-linear and on platforms with different cache architectures
extra flush operations may be needed.
> stack buffer holding the padded filename. Fix it by encrypting the
> filename in-place in the output buffer, thereby making the temporary
> buffer unnecessary.
>
> This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
> because this allowed the BUG in sg_set_buf() to be triggered.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> fs/crypto/fname.c | 53 +++++++++++++++++++++--------------------------------
> 1 file changed, 21 insertions(+), 32 deletions(-)
>
> diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
> index 9a28133..9b774f4 100644
> --- a/fs/crypto/fname.c
> +++ b/fs/crypto/fname.c
> @@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res)
> static int fname_encrypt(struct inode *inode,
> const struct qstr *iname, struct fscrypt_str *oname)
> {
> - u32 ciphertext_len;
> struct skcipher_request *req = NULL;
> DECLARE_FS_COMPLETION_RESULT(ecr);
> struct fscrypt_info *ci = inode->i_crypt_info;
> struct crypto_skcipher *tfm = ci->ci_ctfm;
> int res = 0;
> char iv[FS_CRYPTO_BLOCK_SIZE];
> - struct scatterlist src_sg, dst_sg;
> + struct scatterlist sg;
> int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
> - char *workbuf, buf[32], *alloc_buf = NULL;
> - unsigned lim;
> + unsigned int lim;
> + unsigned int cryptlen;
>
> lim = inode->i_sb->s_cop->max_namelen(inode);
> if (iname->len <= 0 || iname->len > lim)
> return -EIO;
>
> - ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE);
> - ciphertext_len = round_up(ciphertext_len, padding);
> - ciphertext_len = min(ciphertext_len, lim);
> + /*
> + * Copy the filename to the output buffer for encrypting in-place and
> + * pad it with the needed number of NUL bytes.
> + */
> + cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE);
> + cryptlen = round_up(cryptlen, padding);
> + cryptlen = min(cryptlen, lim);
> + memcpy(oname->name, iname->name, iname->len);
> + memset(oname->name + iname->len, 0, cryptlen - iname->len);
>
> - if (ciphertext_len <= sizeof(buf)) {
> - workbuf = buf;
> - } else {
> - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
> - if (!alloc_buf)
> - return -ENOMEM;
> - workbuf = alloc_buf;
> - }
> + /* Initialize the IV */
> + memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
You can initialize it with iv = {0} at the beginning such that you don't
need the memset here.
> - /* Allocate request */
> + /* Set up the encryption request */
> req = skcipher_request_alloc(tfm, GFP_NOFS);
> if (!req) {
> printk_ratelimited(KERN_ERR
> - "%s: crypto_request_alloc() failed\n", __func__);
> - kfree(alloc_buf);
> + "%s: skcipher_request_alloc() failed\n", __func__);
> return -ENOMEM;
> }
> skcipher_request_set_callback(req,
> CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
> fname_crypt_complete, &ecr);
> + sg_init_one(&sg, oname->name, cryptlen);
> + skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv);
>
> - /* Copy the input */
> - memcpy(workbuf, iname->name, iname->len);
> - if (iname->len < ciphertext_len)
> - memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
> -
> - /* Initialize IV */
> - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
> -
> - /* Create encryption request */
> - sg_init_one(&src_sg, workbuf, ciphertext_len);
> - sg_init_one(&dst_sg, oname->name, ciphertext_len);
> - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
> + /* Do the encryption */
> res = crypto_skcipher_encrypt(req);
> if (res == -EINPROGRESS || res == -EBUSY) {
> + /* Request is being completed asynchronously; wait for it */
> wait_for_completion(&ecr.completion);
> res = ecr.res;
> }
> - kfree(alloc_buf);
> skcipher_request_free(req);
> if (res < 0) {
> printk_ratelimited(KERN_ERR
> @@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode,
> return res;
> }
>
> - oname->len = ciphertext_len;
> + oname->len = cryptlen;
> return 0;
> }
Reviewed-by: Richard Weinberger <richard@nod.at>
Thanks,
//richard
^ permalink raw reply
* Re: [PATCH 2/2] fscrypto: don't use on-stack buffer for key derivation
From: Richard Weinberger @ 2016-11-07 13:22 UTC (permalink / raw)
To: Eric Biggers, linux-fsdevel
Cc: linux-ext4, linux-f2fs-devel, linux-crypto, tytso, jaegeuk, luto
In-Reply-To: <1478210582-86338-2-git-send-email-ebiggers@google.com>
On 03.11.2016 23:03, Eric Biggers wrote:
> With the new (in 4.9) option to use a virtually-mapped stack
> (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for
> the scatterlist crypto API because they may not be directly mappable to
> struct page. get_crypt_info() was using a stack buffer to hold the
See 1/2. :-)
> output from the encryption operation used to derive the per-file key.
> Fix it by using a heap buffer.
>
> This bug could most easily be observed in a CONFIG_DEBUG_SG kernel
> because this allowed the BUG in sg_set_buf() to be triggered.
>
> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
> fs/crypto/keyinfo.c | 16 +++++++++++++---
> 1 file changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
> index 82f0285..67fb6d8 100644
> --- a/fs/crypto/keyinfo.c
> +++ b/fs/crypto/keyinfo.c
> @@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode)
> struct crypto_skcipher *ctfm;
> const char *cipher_str;
> int keysize;
> - u8 raw_key[FS_MAX_KEY_SIZE];
> + u8 *raw_key = NULL;
> int res;
>
> res = fscrypt_initialize();
> @@ -238,6 +238,15 @@ int get_crypt_info(struct inode *inode)
> if (res)
> goto out;
>
> + /*
> + * This cannot be a stack buffer because it is passed to the scatterlist
> + * crypto API as part of key derivation.
> + */
> + res = -ENOMEM;
> + raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
> + if (!raw_key)
> + goto out;
> +
> if (fscrypt_dummy_context_enabled(inode)) {
> memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
> goto got_key;
> @@ -276,7 +285,8 @@ int get_crypt_info(struct inode *inode)
> if (res)
> goto out;
>
> - memzero_explicit(raw_key, sizeof(raw_key));
> + kzfree(raw_key);
> + raw_key = NULL;
> if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
> put_crypt_info(crypt_info);
> goto retry;
> @@ -287,7 +297,7 @@ int get_crypt_info(struct inode *inode)
> if (res == -ENOKEY)
> res = 0;
> put_crypt_info(crypt_info);
> - memzero_explicit(raw_key, sizeof(raw_key));
> + kzfree(raw_key);
> return res;
> }
Reviewed-by: Richard Weinberger <richard@nod.at>
Thanks,
//richard
^ permalink raw reply
* Re: [PATCH] crypto: caam: do not register AES-XTS mode on LP units
From: Horia Geanta Neag @ 2016-11-07 7:13 UTC (permalink / raw)
To: Sven Ebenfeld, linux-crypto@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: herbert@gondor.apana.org.au, davem@davemloft.net, Cata Vasile
In-Reply-To: <1478301447-75861-1-git-send-email-sven.ebenfeld@gmail.com>
On 11/5/2016 1:17 AM, Sven Ebenfeld wrote:
> When using AES-XTS on a Wandboard, we receive a Mode error:
> caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error.
>
> Due to the Security Reference Manual, the Low Power AES units
s/Due to/According to
> of the i.MX6 do not support the XTS mode. Therefore we should
> try to provide them them in the API.
>
Rephrase: Therefore we mustn't register XTS implementations to Crypto
API in this case.
> Signed-off-by: Sven Ebenfeld <sven.ebenfeld@gmail.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Please send the patch to -stable and mention the offending commit:
Cc: <stable@vger.kernel.org> # 4.4+
Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)"
Thanks,
Horia
> ---
> drivers/crypto/caam/caamalg.c | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
> index 156aad1..f5a63ba 100644
> --- a/drivers/crypto/caam/caamalg.c
> +++ b/drivers/crypto/caam/caamalg.c
> @@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void)
> if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
> continue;
>
> + /*
> + * Check support for AES modes not available
> + * on LP devices.
> + */
> + if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP)
> + if ((alg->class1_alg_type & OP_ALG_AAI_MASK) ==
> + OP_ALG_AAI_XTS)
> + continue;
> +
> t_alg = caam_alg_alloc(alg);
> if (IS_ERR(t_alg)) {
> err = PTR_ERR(t_alg);
>
^ permalink raw reply
* Re: [PATCH 1/2] fscrypto: don't use on-stack buffer for filename encryption
From: Christoph Hellwig @ 2016-11-07 15:44 UTC (permalink / raw)
To: Kent Overstreet
Cc: Eric Biggers, linux-fsdevel, linux-ext4, linux-f2fs-devel,
linux-crypto, tytso, jaegeuk, richard, luto
In-Reply-To: <20161105151349.e5ap547uno3hfit7@kmo-pixel>
On Sat, Nov 05, 2016 at 07:13:49AM -0800, Kent Overstreet wrote:
> Vmalloc memory does have struct pages - you just need to use vmalloc_to_page()
> instead of virt_to_page. Look at drivers/md/bcache/util.c bch_bio_map() if you
> want an example.
That example seems to be clearly broken on virtually index caches
due to the lack of flush_kernel_vmap_range and
invalidate_kernel_vmap_range calls.
^ permalink raw reply
* [PATCH v2] crypto: caam: do not register AES-XTS mode on LP units
From: Sven Ebenfeld @ 2016-11-07 17:51 UTC (permalink / raw)
To: linux-crypto, linux-kernel, stable, horia.geanta
Cc: herbert, davem, cata.vasile, Sven Ebenfeld
When using AES-XTS on a Wandboard, we receive a Mode error:
caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error.
According to the Security Reference Manual, the Low Power AES units
of the i.MX6 do not support the XTS mode. Therefore we must not
register XTS implementations in the Crypto API.
Signed-off-by: Sven Ebenfeld <sven.ebenfeld@gmail.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Cc: <stable@vger.kernel.org> # 4.4+
Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)"
---
drivers/crypto/caam/caamalg.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 156aad1..f5a63ba 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void)
if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
continue;
+ /*
+ * Check support for AES modes not available
+ * on LP devices.
+ */
+ if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP)
+ if ((alg->class1_alg_type & OP_ALG_AAI_MASK) ==
+ OP_ALG_AAI_XTS)
+ continue;
+
t_alg = caam_alg_alloc(alg);
if (IS_ERR(t_alg)) {
err = PTR_ERR(t_alg);
--
2.7.4
^ permalink raw reply related
* AW: [PATCH] crypto: caam: do not register AES-XTS mode on LP units
From: Sven Ebenfeld @ 2016-11-07 17:58 UTC (permalink / raw)
To: 'Horia Geanta Neag', linux-crypto, linux-kernel
Cc: herbert, davem, 'Cata Vasile'
In-Reply-To: <DB4PR04MB08471753F3A712A732373E0598A70@DB4PR04MB0847.eurprd04.prod.outlook.com>
[-- Attachment #1: Type: text/plain, Size: 1278 bytes --]
> -----Ursprüngliche Nachricht-----
> Von: Horia Geanta Neag [mailto:horia.geanta@nxp.com]
> Gesendet: Montag, 7. November 2016 08:14
> An: Sven Ebenfeld <sven.ebenfeld@gmail.com>; linux-
> crypto@vger.kernel.org; linux-kernel@vger.kernel.org
> Cc: herbert@gondor.apana.org.au; davem@davemloft.net; Cata Vasile
> <cata.vasile@nxp.com>
> Betreff: Re: [PATCH] crypto: caam: do not register AES-XTS mode on LP
units
>
> On 11/5/2016 1:17 AM, Sven Ebenfeld wrote:
> > When using AES-XTS on a Wandboard, we receive a Mode error:
> > caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error.
> >
> > Due to the Security Reference Manual, the Low Power AES units
> s/Due to/According to
>
> > of the i.MX6 do not support the XTS mode. Therefore we should try to
> > provide them them in the API.
> >
> Rephrase: Therefore we mustn't register XTS implementations to Crypto API
> in this case.
>
> > Signed-off-by: Sven Ebenfeld <sven.ebenfeld@gmail.com>
> Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
>
> Please send the patch to -stable and mention the offending commit:
> Cc: <stable@vger.kernel.org> # 4.4+
> Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)"
>
> Thanks,
> Horia
Thanks, I've sent a v2.
Sven
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 6016 bytes --]
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 18:08 UTC (permalink / raw)
To: Eric Biggers
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <20161104173723.GB34176@google.com>
Hi Eric,
On Fri, Nov 4, 2016 at 6:37 PM, Eric Biggers <ebiggers@google.com> wrote:
> I agree, and the current code is wrong; but do note that this proposal is
> correct for poly1305_setrkey() but not for poly1305_setskey() and
> poly1305_blocks(). In the latter two cases, 4-byte alignment of the source
> buffer is *not* guaranteed. Although crypto_poly1305_update() will be called
> with a 4-byte aligned buffer due to the alignmask set on poly1305_alg, the
> algorithm operates on 16-byte blocks and therefore has to buffer partial blocks.
> If some number of bytes that is not 0 mod 4 is buffered, then the buffer will
> fall out of alignment on the next update call. Hence, get_unaligned_le32() is
> actually needed on all the loads, since the buffer will, in general, be of
> unknown alignment.
Hmm... The general data flow that strikes me as most pertinent is
something like:
struct sk_buff *skb = get_it_from_somewhere();
skb = skb_share_check(skb, GFP_ATOMIC);
num_frags = skb_cow_data(skb, ..., ...);
struct scatterlist sg[num_frags];
sg_init_table(sg, num_frags);
skb_to_sgvec(skb, sg, ..., ...);
blkcipher_walk_init(&walk, sg, sg, len);
blkcipher_walk_virt_block(&desc, &walk, BLOCK_SIZE);
while (walk.nbytes >= BLOCK_SIZE) {
size_t chunk_len = rounddown(walk.nbytes, BLOCK_SIZE);
poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len);
blkcipher_walk_done(&desc, &walk, walk.nbytes % BLOCK_SIZE);
}
if (walk.nbytes) {
poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes);
blkcipher_walk_done(&desc, &walk, 0);
}
Is your suggestion that that in the final if block, walk.src.virt.addr
might be unaligned? Like in the case of the last fragment being 67
bytes long?
If so, what a hassle. I hope the performance overhead isn't too
awful... I'll resubmit taking into account your suggestions.
By the way -- offlist benchmarks sent to me concluded that using the
unaligned load helpers like David suggested is just as fast as that
handrolled bit magic in the v1.
Regards,
Jason
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 18:23 UTC (permalink / raw)
To: Eric Biggers
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <CAHmME9oejs+USiGJX2P0TgvnR6XRzV1HYZPrq=c-CjGzq59=NQ@mail.gmail.com>
On Mon, Nov 7, 2016 at 7:08 PM, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> Hmm... The general data flow that strikes me as most pertinent is
> something like:
>
> struct sk_buff *skb = get_it_from_somewhere();
> skb = skb_share_check(skb, GFP_ATOMIC);
> num_frags = skb_cow_data(skb, ..., ...);
> struct scatterlist sg[num_frags];
> sg_init_table(sg, num_frags);
> skb_to_sgvec(skb, sg, ..., ...);
> blkcipher_walk_init(&walk, sg, sg, len);
> blkcipher_walk_virt_block(&desc, &walk, BLOCK_SIZE);
> while (walk.nbytes >= BLOCK_SIZE) {
> size_t chunk_len = rounddown(walk.nbytes, BLOCK_SIZE);
> poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len);
> blkcipher_walk_done(&desc, &walk, walk.nbytes % BLOCK_SIZE);
> }
> if (walk.nbytes) {
> poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes);
> blkcipher_walk_done(&desc, &walk, 0);
> }
>
> Is your suggestion that that in the final if block, walk.src.virt.addr
> might be unaligned? Like in the case of the last fragment being 67
> bytes long?
In fact, I'm not so sure this happens here. In the while loop, each
new walk.src.virt.addr will be aligned to BLOCK_SIZE or be aligned by
virtue of being at the start of a new page. In the subsequent if
block, walk.src.virt.addr will either be
some_aligned_address+BLOCK_SIZE, which will be aligned, or it will be
a start of a new page, which will be aligned.
So what did you have in mind exactly?
I don't think anybody is running code like:
for (size_t i = 0; i < len; i += 17)
poly1305_update(&poly, &buffer[i], 17);
(And if so, those consumers should be fixed.)
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Eric Biggers @ 2016-11-07 18:26 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <CAHmME9oejs+USiGJX2P0TgvnR6XRzV1HYZPrq=c-CjGzq59=NQ@mail.gmail.com>
On Mon, Nov 07, 2016 at 07:08:22PM +0100, Jason A. Donenfeld wrote:
> Hmm... The general data flow that strikes me as most pertinent is
> something like:
>
> struct sk_buff *skb = get_it_from_somewhere();
> skb = skb_share_check(skb, GFP_ATOMIC);
> num_frags = skb_cow_data(skb, ..., ...);
> struct scatterlist sg[num_frags];
> sg_init_table(sg, num_frags);
> skb_to_sgvec(skb, sg, ..., ...);
> blkcipher_walk_init(&walk, sg, sg, len);
> blkcipher_walk_virt_block(&desc, &walk, BLOCK_SIZE);
> while (walk.nbytes >= BLOCK_SIZE) {
> size_t chunk_len = rounddown(walk.nbytes, BLOCK_SIZE);
> poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len);
> blkcipher_walk_done(&desc, &walk, walk.nbytes % BLOCK_SIZE);
> }
> if (walk.nbytes) {
> poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes);
> blkcipher_walk_done(&desc, &walk, 0);
> }
>
> Is your suggestion that that in the final if block, walk.src.virt.addr
> might be unaligned? Like in the case of the last fragment being 67
> bytes long?
I was not referring to any users in particular, only what users could do. As an
example, if you did crypto_shash_update() with 32, 15, then 17 bytes, and the
underlying algorithm is poly1305-generic, the last block would end up
misaligned. This doesn't appear possible with your pseudocode because it only
passes in multiples of the block size until the very end. However I don't see
it claimed anywhere that shash API users have to do that.
Eric
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 19:02 UTC (permalink / raw)
To: Eric Biggers
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <20161107182646.GA34388@google.com>
On Mon, Nov 7, 2016 at 7:26 PM, Eric Biggers <ebiggers@google.com> wrote:
>
> I was not referring to any users in particular, only what users could do. As an
> example, if you did crypto_shash_update() with 32, 15, then 17 bytes, and the
> underlying algorithm is poly1305-generic, the last block would end up
> misaligned. This doesn't appear possible with your pseudocode because it only
> passes in multiples of the block size until the very end. However I don't see
> it claimed anywhere that shash API users have to do that.
Actually it appears that crypto/poly1305_generic.c already buffers
incoming blocks to a buffer that definitely looks aligned, to prevent
this condition!
I'll submit a v2 with only the inner unaligned operations changed.
^ permalink raw reply
* [PATCH v2] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 19:12 UTC (permalink / raw)
To: Herbert Xu, David S. Miller, linux-crypto, linux-kernel,
Martin Willi, Eric Biggers, René van Dorst
Cc: Jason A. Donenfeld
In-Reply-To: <20161102175810.18647-1-Jason@zx2c4.com>
By using the unaligned access helpers, we drastically improve
performance on small MIPS routers that have to go through the exception
fix-up handler for these unaligned accesses.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/poly1305_generic.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 2df9835d..0b86f4e 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -66,9 +66,9 @@ static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
dctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff;
- dctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03;
- dctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff;
- dctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff;
+ dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
+ dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
+ dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
}
@@ -138,9 +138,9 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
/* h += m[i] */
h0 += (le32_to_cpuvp(src + 0) >> 0) & 0x3ffffff;
- h1 += (le32_to_cpuvp(src + 3) >> 2) & 0x3ffffff;
- h2 += (le32_to_cpuvp(src + 6) >> 4) & 0x3ffffff;
- h3 += (le32_to_cpuvp(src + 9) >> 6) & 0x3ffffff;
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
/* h *= r */
--
2.10.2
^ permalink raw reply related
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Eric Biggers @ 2016-11-07 19:25 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <CAHmME9rzBST8Lqapzykwf2rUyxLXuyOF0wX+Sy259Qtx9a4YSg@mail.gmail.com>
On Mon, Nov 07, 2016 at 08:02:35PM +0100, Jason A. Donenfeld wrote:
> On Mon, Nov 7, 2016 at 7:26 PM, Eric Biggers <ebiggers@google.com> wrote:
> >
> > I was not referring to any users in particular, only what users could do. As an
> > example, if you did crypto_shash_update() with 32, 15, then 17 bytes, and the
> > underlying algorithm is poly1305-generic, the last block would end up
> > misaligned. This doesn't appear possible with your pseudocode because it only
> > passes in multiples of the block size until the very end. However I don't see
> > it claimed anywhere that shash API users have to do that.
>
> Actually it appears that crypto/poly1305_generic.c already buffers
> incoming blocks to a buffer that definitely looks aligned, to prevent
> this condition!
>
No it does *not* buffer all incoming blocks, which is why the source pointer can
fall out of alignment. Yes, I actually tested this. In fact this situation is
even hit, in both possible places, in the self-tests.
Eric
^ permalink raw reply
* Re: [PATCH] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 19:41 UTC (permalink / raw)
To: Eric Biggers
Cc: Herbert Xu, Martin Willi, LKML, linux-crypto, David Miller,
WireGuard mailing list
In-Reply-To: <20161107192505.GB34388@google.com>
On Mon, Nov 7, 2016 at 8:25 PM, Eric Biggers <ebiggers@google.com> wrote:
> No it does *not* buffer all incoming blocks, which is why the source pointer can
> fall out of alignment. Yes, I actually tested this. In fact this situation is
> even hit, in both possible places, in the self-tests.
Urgh! v3 coming right up...
^ permalink raw reply
* [PATCH v3] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 19:43 UTC (permalink / raw)
To: Herbert Xu, David S. Miller, linux-crypto, linux-kernel,
Martin Willi, Eric Biggers, René van Dorst
Cc: Jason A. Donenfeld
In-Reply-To: <20161107191253.17998-1-Jason@zx2c4.com>
By using the unaligned access helpers, we drastically improve
performance on small MIPS routers that have to go through the exception
fix-up handler for these unaligned accesses.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/poly1305_generic.c | 33 ++++++++++++++-------------------
1 file changed, 14 insertions(+), 19 deletions(-)
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 2df9835d..7a77cfc 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -33,11 +33,6 @@ static inline u32 and(u32 v, u32 mask)
return v & mask;
}
-static inline u32 le32_to_cpuvp(const void *p)
-{
- return le32_to_cpup(p);
-}
-
int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -65,19 +60,19 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- dctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff;
- dctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03;
- dctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff;
- dctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff;
- dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
+ dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff;
+ dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
+ dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
+ dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
+ dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
}
static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
- dctx->s[0] = le32_to_cpuvp(key + 0);
- dctx->s[1] = le32_to_cpuvp(key + 4);
- dctx->s[2] = le32_to_cpuvp(key + 8);
- dctx->s[3] = le32_to_cpuvp(key + 12);
+ dctx->s[0] = get_unaligned_le32(key + 0);
+ dctx->s[1] = get_unaligned_le32(key + 4);
+ dctx->s[2] = get_unaligned_le32(key + 8);
+ dctx->s[3] = get_unaligned_le32(key + 12);
}
unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
@@ -137,11 +132,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
/* h += m[i] */
- h0 += (le32_to_cpuvp(src + 0) >> 0) & 0x3ffffff;
- h1 += (le32_to_cpuvp(src + 3) >> 2) & 0x3ffffff;
- h2 += (le32_to_cpuvp(src + 6) >> 4) & 0x3ffffff;
- h3 += (le32_to_cpuvp(src + 9) >> 6) & 0x3ffffff;
- h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
/* h *= r */
d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
--
2.10.2
^ permalink raw reply related
* [PATCH v4] poly1305: generic C can be faster on chips with slow unaligned access
From: Jason A. Donenfeld @ 2016-11-07 19:47 UTC (permalink / raw)
To: Herbert Xu, David S. Miller, linux-crypto, linux-kernel,
Martin Willi, Eric Biggers, René van Dorst
Cc: Jason A. Donenfeld
In-Reply-To: <20161107191253.17998-1-Jason@zx2c4.com>
By using the unaligned access helpers, we drastically improve
performance on small MIPS routers that have to go through the exception
fix-up handler for these unaligned accesses.
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
crypto/poly1305_generic.c | 34 +++++++++++++++-------------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 2df9835d..b1c2d57 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -17,6 +17,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <asm/unaligned.h>
static inline u64 mlt(u64 a, u64 b)
{
@@ -33,11 +34,6 @@ static inline u32 and(u32 v, u32 mask)
return v & mask;
}
-static inline u32 le32_to_cpuvp(const void *p)
-{
- return le32_to_cpup(p);
-}
-
int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -65,19 +61,19 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- dctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff;
- dctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03;
- dctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff;
- dctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff;
- dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
+ dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff;
+ dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
+ dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
+ dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
+ dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
}
static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
{
- dctx->s[0] = le32_to_cpuvp(key + 0);
- dctx->s[1] = le32_to_cpuvp(key + 4);
- dctx->s[2] = le32_to_cpuvp(key + 8);
- dctx->s[3] = le32_to_cpuvp(key + 12);
+ dctx->s[0] = get_unaligned_le32(key + 0);
+ dctx->s[1] = get_unaligned_le32(key + 4);
+ dctx->s[2] = get_unaligned_le32(key + 8);
+ dctx->s[3] = get_unaligned_le32(key + 12);
}
unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
@@ -137,11 +133,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
/* h += m[i] */
- h0 += (le32_to_cpuvp(src + 0) >> 0) & 0x3ffffff;
- h1 += (le32_to_cpuvp(src + 3) >> 2) & 0x3ffffff;
- h2 += (le32_to_cpuvp(src + 6) >> 4) & 0x3ffffff;
- h3 += (le32_to_cpuvp(src + 9) >> 6) & 0x3ffffff;
- h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
+ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
+ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
+ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
+ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
+ h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
/* h *= r */
d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
--
2.10.2
^ permalink raw reply related
* Re: [PATCH v4] poly1305: generic C can be faster on chips with slow unaligned access
From: Eric Biggers @ 2016-11-07 20:40 UTC (permalink / raw)
To: Jason A. Donenfeld
Cc: Herbert Xu, David S. Miller, linux-crypto, linux-kernel,
Martin Willi, René van Dorst
In-Reply-To: <20161107194709.20457-1-Jason@zx2c4.com>
On Mon, Nov 07, 2016 at 08:47:09PM +0100, Jason A. Donenfeld wrote:
> By using the unaligned access helpers, we drastically improve
> performance on small MIPS routers that have to go through the exception
> fix-up handler for these unaligned accesses.
>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> ---
Reviewed-by: Eric Biggers <ebiggers@google.com>
Nit: the subject line is a little unclear about what was changed.
"make generic C faster on chips with slow unaligned access" would be better.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox