Linux cryptographic layer development
 help / color / mirror / Atom feed
* [PATCH 7/8] crypto: mediatek - add support to CTR mode
From: Ryder Lee @ 2017-01-20  5:41 UTC (permalink / raw)
  To: Herbert Xu, David S. Miller
  Cc: linux-mediatek, linux-kernel, linux-crypto, linux-arm-kernel,
	Ryder Lee
In-Reply-To: <1484890875-57105-1-git-send-email-ryder.lee@mediatek.com>

This patch adds support to the CTR mode.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
---
 drivers/crypto/mediatek/mtk-aes.c | 151 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 146 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
index 5e7c3ce..bb5b4ff 100644
--- a/drivers/crypto/mediatek/mtk-aes.c
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -23,8 +23,10 @@
 /* AES command token size */
 #define AES_CT_SIZE_ECB		2
 #define AES_CT_SIZE_CBC		3
+#define AES_CT_SIZE_CTR		3
 #define AES_CT_CTRL_HDR		cpu_to_le32(0x00220000)
-/* AES-CBC/ECB command token */
+
+/* AES-CBC/ECB/CTR command token */
 #define AES_CMD0		cpu_to_le32(0x05000000)
 #define AES_CMD1		cpu_to_le32(0x2d060000)
 #define AES_CMD2		cpu_to_le32(0xe4a63806)
@@ -39,13 +41,15 @@
 /* AES transform information word 1 fields */
 #define AES_TFM_ECB		cpu_to_le32(0x0 << 0)
 #define AES_TFM_CBC		cpu_to_le32(0x1 << 0)
-#define AES_TFM_FULL_IV		cpu_to_le32(0xf << 5)
+#define AES_TFM_CTR_LOAD	cpu_to_le32(0x6 << 0)	/* load/reuse counter */
+#define AES_TFM_FULL_IV		cpu_to_le32(0xf << 5)	/* using IV 0-3 */
 
 /* AES flags */
 #define AES_FLAGS_ECB		BIT(0)
 #define AES_FLAGS_CBC		BIT(1)
-#define AES_FLAGS_ENCRYPT	BIT(2)
-#define AES_FLAGS_BUSY		BIT(3)
+#define AES_FLAGS_CTR		BIT(2)
+#define AES_FLAGS_ENCRYPT	BIT(3)
+#define AES_FLAGS_BUSY		BIT(4)
 
 /**
  * Command token(CT) is a set of hardware instructions that
@@ -90,6 +94,15 @@ struct mtk_aes_ctx {
 	struct mtk_aes_base_ctx	base;
 };
 
+struct mtk_aes_ctr_ctx {
+	struct mtk_aes_base_ctx base;
+
+	u32	iv[AES_BLOCK_SIZE / sizeof(u32)];
+	size_t offset;
+	struct scatterlist src[2];
+	struct scatterlist dst[2];
+};
+
 struct mtk_aes_drv {
 	struct list_head dev_list;
 	/* Device list lock */
@@ -332,7 +345,7 @@ static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 	return -EINVAL;
 }
 
-/* Initialize transform information of CBC/ECB mode */
+/* Initialize transform information of CBC/ECB/CTR mode */
 static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 			      size_t len)
 {
@@ -374,6 +387,13 @@ static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 		ctx->tfm.ctrl[1] = AES_TFM_ECB;
 
 		ctx->ct_size = AES_CT_SIZE_ECB;
+	} else if (aes->flags & AES_FLAGS_CTR) {
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen +
+				    SIZE_IN_WORDS(AES_BLOCK_SIZE));
+		ctx->tfm.ctrl[1] = AES_TFM_CTR_LOAD | AES_TFM_FULL_IV;
+
+		ctx->ct.cmd[2] = AES_CMD2;
+		ctx->ct_size = AES_CT_SIZE_CTR;
 	}
 }
 
@@ -479,6 +499,80 @@ static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 	return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes);
 }
 
+static inline struct mtk_aes_ctr_ctx *
+mtk_aes_ctr_ctx_cast(struct mtk_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct mtk_aes_ctr_ctx, base);
+}
+
+static int mtk_aes_ctr_transfer(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(ctx);
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct scatterlist *src, *dst;
+	int i;
+	u32 start, end, ctr, blocks, *iv_state;
+	size_t datalen;
+	bool fragmented = false;
+
+	/* Check for transfer completion. */
+	cctx->offset += aes->total;
+	if (cctx->offset >= req->nbytes)
+		return mtk_aes_complete(cryp, aes);
+
+	/* Compute data length. */
+	datalen = req->nbytes - cctx->offset;
+	blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE);
+	ctr = be32_to_cpu(cctx->iv[3]);
+
+	/* Check 32bit counter overflow. */
+	start = ctr;
+	end = start + blocks - 1;
+	if (end < start) {
+		ctr |= 0xffffffff;
+		datalen = AES_BLOCK_SIZE * -start;
+		fragmented = true;
+	}
+
+	/* Jump to offset. */
+	src = scatterwalk_ffwd(cctx->src, req->src, cctx->offset);
+	dst = ((req->src == req->dst) ? src :
+	       scatterwalk_ffwd(cctx->dst, req->dst, cctx->offset));
+
+	/* Write IVs into transform state buffer. */
+	iv_state = ctx->tfm.state + ctx->keylen;
+	for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++)
+		iv_state[i] = cpu_to_le32(cctx->iv[i]);
+
+	if (unlikely(fragmented)) {
+	/*
+	 * Increment the counter manually to cope with the hardware
+	 * counter overflow.
+	 */
+		cctx->iv[3] = cpu_to_be32(ctr);
+		crypto_inc((u8 *)cctx->iv, AES_BLOCK_SIZE);
+	}
+	aes->resume = mtk_aes_ctr_transfer;
+
+	return mtk_aes_dma(cryp, aes, src, dst, datalen);
+}
+
+static int mtk_aes_ctr_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_ctr_ctx *cctx = mtk_aes_ctr_ctx_cast(aes->ctx);
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
+
+	mtk_aes_set_mode(aes, rctx);
+
+	memcpy(cctx->iv, req->info, AES_BLOCK_SIZE);
+	cctx->offset = 0;
+	aes->total = 0;
+
+	return mtk_aes_ctr_transfer(cryp, aes);
+}
+
 /* Check and set the AES key to transform state buffer */
 static int mtk_aes_setkey(struct crypto_ablkcipher *tfm,
 			  const u8 *key, u32 keylen)
@@ -536,6 +630,16 @@ static int mtk_aes_cbc_decrypt(struct ablkcipher_request *req)
 	return mtk_aes_crypt(req, AES_FLAGS_CBC);
 }
 
+static int mtk_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CTR);
+}
+
+static int mtk_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_CTR);
+}
+
 static int mtk_aes_cra_init(struct crypto_tfm *tfm)
 {
 	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -552,6 +656,22 @@ static int mtk_aes_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_aes_find_dev(&ctx->base);
+	if (!cryp) {
+		pr_err("can't find crypto device\n");
+		return -ENODEV;
+	}
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
+	ctx->base.start = mtk_aes_ctr_start;
+	return 0;
+}
+
 static struct crypto_alg aes_algs[] = {
 {
 	.cra_name		= "cbc(aes)",
@@ -594,6 +714,27 @@ static int mtk_aes_cra_init(struct crypto_tfm *tfm)
 		.decrypt	= mtk_aes_ecb_decrypt,
 	}
 },
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_ctr_cra_init,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctr_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_ctr_encrypt,
+		.decrypt	= mtk_aes_ctr_decrypt,
+	}
+},
 };
 
 static void mtk_aes_enc_task(unsigned long data)
-- 
1.9.1

^ permalink raw reply related

* [PATCH 5/8] crypto: mediatek - regroup functions by usage
From: Ryder Lee @ 2017-01-20  5:41 UTC (permalink / raw)
  To: Herbert Xu, David S. Miller
  Cc: linux-mediatek, linux-kernel, linux-crypto, linux-arm-kernel,
	Ryder Lee
In-Reply-To: <1484890875-57105-1-git-send-email-ryder.lee@mediatek.com>

This patch only regroup functions by usage.
This will help to integrate the GCM support patch later by
adjusting some shared code section, such as common code which
will be reused by GCM, AES mode setting, and DMA transfer.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
---
 drivers/crypto/mediatek/mtk-aes.c | 272 ++++++++++++++++++++------------------
 1 file changed, 141 insertions(+), 131 deletions(-)

diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
index 9c4e468..b5946e9 100644
--- a/drivers/crypto/mediatek/mtk-aes.c
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -42,7 +42,6 @@
 #define AES_TFM_FULL_IV		cpu_to_le32(0xf << 5)
 
 /* AES flags */
-#define AES_FLAGS_MODE_MSK	0x7
 #define AES_FLAGS_ECB		BIT(0)
 #define AES_FLAGS_CBC		BIT(1)
 #define AES_FLAGS_ENCRYPT	BIT(2)
@@ -170,65 +169,28 @@ static bool mtk_aes_check_aligned(struct scatterlist *sg, size_t len,
 	return false;
 }
 
-/* Initialize and map transform information of AES */
-static int mtk_aes_info_map(struct mtk_cryp *cryp,
-			    struct mtk_aes_rec *aes,
-			    size_t len)
+static inline void mtk_aes_set_mode(struct mtk_aes_rec *aes,
+				    const struct mtk_aes_reqctx *rctx)
 {
-	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
-	struct mtk_aes_base_ctx *ctx = aes->ctx;
-
-	ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len);
-	ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len);
-	ctx->ct.cmd[1] = AES_CMD1;
-
-	if (aes->flags & AES_FLAGS_ENCRYPT)
-		ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT;
-	else
-		ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN;
-
-	if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128))
-		ctx->tfm.ctrl[0] |= AES_TFM_128BITS;
-	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256))
-		ctx->tfm.ctrl[0] |= AES_TFM_256BITS;
-	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_192))
-		ctx->tfm.ctrl[0] |= AES_TFM_192BITS;
-
-	if (aes->flags & AES_FLAGS_CBC) {
-		const u32 *iv = (const u32 *)req->info;
-		u32 *iv_state = ctx->tfm.state + ctx->keylen;
-		int i;
-
-		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen +
-				  SIZE_IN_WORDS(AES_BLOCK_SIZE));
-		ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV;
-
-		for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++)
-			iv_state[i] = cpu_to_le32(iv[i]);
+	/* Clear all but persistent flags and set request flags. */
+	aes->flags = (aes->flags & AES_FLAGS_BUSY) | rctx->mode;
+}
 
-		ctx->ct.cmd[2] = AES_CMD2;
-		ctx->ct_size  = AES_CT_SIZE_CBC;
-	} else if (aes->flags & AES_FLAGS_ECB) {
-		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen);
-		ctx->tfm.ctrl[1] = AES_TFM_ECB;
+static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma)
+{
+	struct scatterlist *sg = dma->sg;
+	int nents = dma->nents;
 
-		ctx->ct_size = AES_CT_SIZE_ECB;
-	}
+	if (!dma->remainder)
+		return;
 
-	ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct),
-				     DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma)))
-		return -EINVAL;
+	while (--nents > 0 && sg)
+		sg = sg_next(sg);
 
-	ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm),
-				      DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma))) {
-		dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
-				 DMA_TO_DEVICE);
-		return -EINVAL;
-	}
+	if (!sg)
+		return;
 
-	return 0;
+	sg->length += dma->remainder;
 }
 
 /*
@@ -288,24 +250,134 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 	return -EINPROGRESS;
 }
 
-static inline void mtk_aes_restore_sg(const struct mtk_aes_dma *dma)
+static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 {
-	struct scatterlist *sg = dma->sg;
-	int nents = dma->nents;
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
 
-	if (!dma->remainder)
-		return;
+	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct),
+			 DMA_TO_DEVICE);
+	dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
+			 DMA_TO_DEVICE);
 
-	while (--nents > 0 && sg)
-		sg = sg_next(sg);
+	if (aes->src.sg == aes->dst.sg) {
+		dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents,
+			     DMA_BIDIRECTIONAL);
 
-	if (!sg)
-		return;
+		if (aes->src.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->src);
+	} else {
+		dma_unmap_sg(cryp->dev, aes->dst.sg, aes->dst.nents,
+			     DMA_FROM_DEVICE);
 
-	sg->length += dma->remainder;
+		if (aes->dst.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->dst);
+
+		dma_unmap_sg(cryp->dev, aes->src.sg, aes->src.nents,
+			     DMA_TO_DEVICE);
+
+		if (aes->src.sg != &aes->aligned_sg)
+			mtk_aes_restore_sg(&aes->src);
+	}
+
+	if (aes->dst.sg == &aes->aligned_sg)
+		sg_copy_from_buffer(aes->real_dst, sg_nents(aes->real_dst),
+				    aes->buf, aes->total);
 }
 
-static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+
+	ctx->ct_dma = dma_map_single(cryp->dev, &ctx->ct, sizeof(ctx->ct),
+				     DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->ct_dma)))
+		return -EINVAL;
+
+	ctx->tfm_dma = dma_map_single(cryp->dev, &ctx->tfm, sizeof(ctx->tfm),
+				      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(cryp->dev, ctx->tfm_dma)))
+		goto tfm_map_err;
+
+	if (aes->src.sg == aes->dst.sg) {
+		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
+					     aes->src.nents,
+					     DMA_BIDIRECTIONAL);
+		aes->dst.sg_len = aes->src.sg_len;
+		if (unlikely(!aes->src.sg_len))
+			goto sg_map_err;
+	} else {
+		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
+					     aes->src.nents, DMA_TO_DEVICE);
+		if (unlikely(!aes->src.sg_len))
+			goto sg_map_err;
+
+		aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg,
+					     aes->dst.nents, DMA_FROM_DEVICE);
+		if (unlikely(!aes->dst.sg_len)) {
+			dma_unmap_sg(cryp->dev, aes->src.sg,
+				     aes->src.nents, DMA_TO_DEVICE);
+			goto sg_map_err;
+		}
+	}
+
+	return mtk_aes_xmit(cryp, aes);
+
+sg_map_err:
+	dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
+			 DMA_TO_DEVICE);
+tfm_map_err:
+	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct),
+			 DMA_TO_DEVICE);
+
+	return -EINVAL;
+}
+
+/* Initialize transform information of CBC/ECB mode */
+static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+			      size_t len)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+
+	ctx->ct_hdr = AES_CT_CTRL_HDR | cpu_to_le32(len);
+	ctx->ct.cmd[0] = AES_CMD0 | cpu_to_le32(len);
+	ctx->ct.cmd[1] = AES_CMD1;
+
+	if (aes->flags & AES_FLAGS_ENCRYPT)
+		ctx->tfm.ctrl[0] = AES_TFM_BASIC_OUT;
+	else
+		ctx->tfm.ctrl[0] = AES_TFM_BASIC_IN;
+
+	if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128))
+		ctx->tfm.ctrl[0] |= AES_TFM_128BITS;
+	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256))
+		ctx->tfm.ctrl[0] |= AES_TFM_256BITS;
+	else
+		ctx->tfm.ctrl[0] |= AES_TFM_192BITS;
+
+	if (aes->flags & AES_FLAGS_CBC) {
+		const u32 *iv = (const u32 *)req->info;
+		u32 *iv_state = ctx->tfm.state + ctx->keylen;
+		int i;
+
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen +
+				    SIZE_IN_WORDS(AES_BLOCK_SIZE));
+		ctx->tfm.ctrl[1] = AES_TFM_CBC | AES_TFM_FULL_IV;
+
+		for (i = 0; i < SIZE_IN_WORDS(AES_BLOCK_SIZE); i++)
+			iv_state[i] = cpu_to_le32(iv[i]);
+
+		ctx->ct.cmd[2] = AES_CMD2;
+		ctx->ct_size = AES_CT_SIZE_CBC;
+	} else if (aes->flags & AES_FLAGS_ECB) {
+		ctx->tfm.ctrl[0] |= AES_TFM_SIZE(ctx->keylen);
+		ctx->tfm.ctrl[1] = AES_TFM_ECB;
+
+		ctx->ct_size = AES_CT_SIZE_ECB;
+	}
+}
+
+static int mtk_aes_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 		       struct scatterlist *src, struct scatterlist *dst,
 		       size_t len)
 {
@@ -346,28 +418,9 @@ static int mtk_aes_map(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 		sg_set_buf(&aes->aligned_sg, aes->buf, len + padlen);
 	}
 
-	if (aes->src.sg == aes->dst.sg) {
-		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
-				aes->src.nents, DMA_BIDIRECTIONAL);
-		aes->dst.sg_len = aes->src.sg_len;
-		if (unlikely(!aes->src.sg_len))
-			return -EFAULT;
-	} else {
-		aes->src.sg_len = dma_map_sg(cryp->dev, aes->src.sg,
-				aes->src.nents, DMA_TO_DEVICE);
-		if (unlikely(!aes->src.sg_len))
-			return -EFAULT;
-
-		aes->dst.sg_len = dma_map_sg(cryp->dev, aes->dst.sg,
-				aes->dst.nents, DMA_FROM_DEVICE);
-		if (unlikely(!aes->dst.sg_len)) {
-			dma_unmap_sg(cryp->dev, aes->src.sg,
-				     aes->src.nents, DMA_TO_DEVICE);
-			return -EFAULT;
-		}
-	}
+	mtk_aes_info_init(cryp, aes, len + padlen);
 
-	return mtk_aes_info_map(cryp, aes, len + padlen);
+	return mtk_aes_map(cryp, aes);
 }
 
 static int mtk_aes_handle_queue(struct mtk_cryp *cryp, u8 id,
@@ -419,54 +472,11 @@ static int mtk_aes_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 {
 	struct ablkcipher_request *req = ablkcipher_request_cast(aes->areq);
 	struct mtk_aes_reqctx *rctx = ablkcipher_request_ctx(req);
-	int err;
-
-	rctx = ablkcipher_request_ctx(req);
-	rctx->mode &= AES_FLAGS_MODE_MSK;
-	aes->flags = (aes->flags & ~AES_FLAGS_MODE_MSK) | rctx->mode;
 
+	mtk_aes_set_mode(aes, rctx);
 	aes->resume = mtk_aes_complete;
 
-	err = mtk_aes_map(cryp, aes, req->src, req->dst, req->nbytes);
-	if (err)
-		return err;
-
-	return mtk_aes_xmit(cryp, aes);
-}
-
-static void mtk_aes_unmap(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
-{
-	struct mtk_aes_base_ctx *ctx = aes->ctx;
-
-	dma_unmap_single(cryp->dev, ctx->ct_dma, sizeof(ctx->ct),
-			 DMA_TO_DEVICE);
-	dma_unmap_single(cryp->dev, ctx->tfm_dma, sizeof(ctx->tfm),
-			 DMA_TO_DEVICE);
-
-	if (aes->src.sg == aes->dst.sg) {
-		dma_unmap_sg(cryp->dev, aes->src.sg,
-			     aes->src.nents, DMA_BIDIRECTIONAL);
-
-		if (aes->src.sg != &aes->aligned_sg)
-			mtk_aes_restore_sg(&aes->src);
-	} else {
-		dma_unmap_sg(cryp->dev, aes->dst.sg,
-			     aes->dst.nents, DMA_FROM_DEVICE);
-
-		if (aes->dst.sg != &aes->aligned_sg)
-			mtk_aes_restore_sg(&aes->dst);
-
-		dma_unmap_sg(cryp->dev, aes->src.sg,
-			     aes->src.nents, DMA_TO_DEVICE);
-
-		if (aes->src.sg != &aes->aligned_sg)
-			mtk_aes_restore_sg(&aes->src);
-	}
-
-	if (aes->dst.sg == &aes->aligned_sg)
-		sg_copy_from_buffer(aes->real_dst,
-				    sg_nents(aes->real_dst),
-				    aes->buf, aes->total);
+	return mtk_aes_dma(cryp, aes, req->src, req->dst, req->nbytes);
 }
 
 /* Check and set the AES key to transform state buffer */
-- 
1.9.1

^ permalink raw reply related

* [PATCH 8/8] crypto: mediatek - add support to GCM mode
From: Ryder Lee @ 2017-01-20  5:41 UTC (permalink / raw)
  To: Herbert Xu, David S. Miller
  Cc: linux-mediatek, linux-kernel, linux-crypto, linux-arm-kernel,
	Ryder Lee
In-Reply-To: <1484890875-57105-1-git-send-email-ryder.lee@mediatek.com>

This patch adds support to the GCM mode.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
---
 drivers/crypto/Kconfig                 |   2 +
 drivers/crypto/mediatek/mtk-aes.c      | 369 ++++++++++++++++++++++++++++++++-
 drivers/crypto/mediatek/mtk-platform.h |   2 +
 3 files changed, 369 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index ee5057a..bf7da55 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -557,7 +557,9 @@ config CRYPTO_DEV_MEDIATEK
 	tristate "MediaTek's EIP97 Cryptographic Engine driver"
 	depends on (ARM && ARCH_MEDIATEK) || COMPILE_TEST
 	select CRYPTO_AES
+	select CRYPTO_AEAD
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_CTR
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
index bb5b4ff..3a47cdb 100644
--- a/drivers/crypto/mediatek/mtk-aes.c
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -24,16 +24,28 @@
 #define AES_CT_SIZE_ECB		2
 #define AES_CT_SIZE_CBC		3
 #define AES_CT_SIZE_CTR		3
+#define AES_CT_SIZE_GCM_OUT	5
+#define AES_CT_SIZE_GCM_IN	6
 #define AES_CT_CTRL_HDR		cpu_to_le32(0x00220000)
 
 /* AES-CBC/ECB/CTR command token */
 #define AES_CMD0		cpu_to_le32(0x05000000)
 #define AES_CMD1		cpu_to_le32(0x2d060000)
 #define AES_CMD2		cpu_to_le32(0xe4a63806)
+/* AES-GCM command token */
+#define AES_GCM_CMD0		cpu_to_le32(0x0b000000)
+#define AES_GCM_CMD1		cpu_to_le32(0xa0800000)
+#define AES_GCM_CMD2		cpu_to_le32(0x25000010)
+#define AES_GCM_CMD3		cpu_to_le32(0x0f020000)
+#define AES_GCM_CMD4		cpu_to_le32(0x21e60000)
+#define AES_GCM_CMD5		cpu_to_le32(0x40e60000)
+#define AES_GCM_CMD6		cpu_to_le32(0xd0070000)
 
 /* AES transform information word 0 fields */
 #define AES_TFM_BASIC_OUT	cpu_to_le32(0x4 << 0)
 #define AES_TFM_BASIC_IN	cpu_to_le32(0x5 << 0)
+#define AES_TFM_GCM_OUT		cpu_to_le32(0x6 << 0)
+#define AES_TFM_GCM_IN		cpu_to_le32(0xf << 0)
 #define AES_TFM_SIZE(x)		cpu_to_le32((x) << 8)
 #define AES_TFM_128BITS		cpu_to_le32(0xb << 16)
 #define AES_TFM_192BITS		cpu_to_le32(0xd << 16)
@@ -41,15 +53,22 @@
 /* AES transform information word 1 fields */
 #define AES_TFM_ECB		cpu_to_le32(0x0 << 0)
 #define AES_TFM_CBC		cpu_to_le32(0x1 << 0)
+#define AES_TFM_CTR_INIT	cpu_to_le32(0x2 << 0)	/* init counter to 1 */
 #define AES_TFM_CTR_LOAD	cpu_to_le32(0x6 << 0)	/* load/reuse counter */
+#define AES_TFM_3IV		cpu_to_le32(0x7 << 5)	/* using IV 0-2 */
 #define AES_TFM_FULL_IV		cpu_to_le32(0xf << 5)	/* using IV 0-3 */
+#define AES_TFM_IV_CTR_MODE	cpu_to_le32(0x1 << 10)
+#define AES_TFM_ENC_HASH	cpu_to_le32(0x1 << 17)
+#define AES_TFM_GHASH_DIG	cpu_to_le32(0x2 << 21)
+#define AES_TFM_GHASH		cpu_to_le32(0x4 << 23)
 
 /* AES flags */
 #define AES_FLAGS_ECB		BIT(0)
 #define AES_FLAGS_CBC		BIT(1)
 #define AES_FLAGS_CTR		BIT(2)
-#define AES_FLAGS_ENCRYPT	BIT(3)
-#define AES_FLAGS_BUSY		BIT(4)
+#define AES_FLAGS_GCM		BIT(3)
+#define AES_FLAGS_ENCRYPT	BIT(4)
+#define AES_FLAGS_BUSY		BIT(5)
 
 /**
  * Command token(CT) is a set of hardware instructions that
@@ -62,14 +81,23 @@
  * - Commands decoding and control of the engine's data path.
  * - Coordinating hardware data fetch and store operations.
  * - Result token construction and output.
+ *
+ * Memory map of GCM's TFM:
+ * /-----------\
+ * |  AES KEY  | 128/196/256 bits
+ * |-----------|
+ * |  HASH KEY | a string 128 zero bits encrypted using the block cipher
+ * |-----------|
+ * |    IVs    | 4 * 4 bytes
+ * \-----------/
  */
 struct mtk_aes_ct {
-	__le32 cmd[AES_CT_SIZE_CBC];
+	__le32 cmd[AES_CT_SIZE_GCM_IN];
 };
 
 struct mtk_aes_tfm {
 	__le32 ctrl[2];
-	__le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE)];
+	__le32 state[SIZE_IN_WORDS(AES_KEYSIZE_256 + AES_BLOCK_SIZE * 2)];
 };
 
 struct mtk_aes_reqctx {
@@ -103,6 +131,20 @@ struct mtk_aes_ctr_ctx {
 	struct scatterlist dst[2];
 };
 
+struct mtk_aes_gcm_ctx {
+	struct mtk_aes_base_ctx base;
+
+	u32 authsize;
+	size_t textlen;
+
+	struct crypto_skcipher *ctr;
+};
+
+struct mtk_aes_gcm_setkey_result {
+	int err;
+	struct completion completion;
+};
+
 struct mtk_aes_drv {
 	struct list_head dev_list;
 	/* Device list lock */
@@ -251,6 +293,10 @@ static int mtk_aes_xmit(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
 	}
 	res->hdr |= MTK_DESC_LAST;
 
+	/* Prepare enough space for authenticated tag */
+	if (aes->flags & AES_FLAGS_GCM)
+		res->hdr += AES_BLOCK_SIZE;
+
 	/*
 	 * Make sure that all changes to the DMA ring are done before we
 	 * start engine.
@@ -737,6 +783,315 @@ static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm)
 },
 };
 
+static inline struct mtk_aes_gcm_ctx *
+mtk_aes_gcm_ctx_cast(struct mtk_aes_base_ctx *ctx)
+{
+	return container_of(ctx, struct mtk_aes_gcm_ctx, base);
+}
+
+/* Initialize transform information of GCM mode */
+static void mtk_aes_gcm_info_init(struct mtk_cryp *cryp,
+				  struct mtk_aes_rec *aes,
+				  size_t len)
+{
+	struct aead_request *req = aead_request_cast(aes->areq);
+	struct mtk_aes_base_ctx *ctx = aes->ctx;
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+	const u32 *iv = (const u32 *)req->iv;
+	u32 *iv_state = ctx->tfm.state + ctx->keylen +
+			SIZE_IN_WORDS(AES_BLOCK_SIZE);
+	u32 ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(req));
+	int i;
+
+	ctx->ct_hdr = AES_CT_CTRL_HDR | len;
+
+	ctx->ct.cmd[0] = AES_GCM_CMD0 | cpu_to_le32(req->assoclen);
+	ctx->ct.cmd[1] = AES_GCM_CMD1 | cpu_to_le32(req->assoclen);
+	ctx->ct.cmd[2] = AES_GCM_CMD2;
+	ctx->ct.cmd[3] = AES_GCM_CMD3 | cpu_to_le32(gctx->textlen);
+
+	if (aes->flags & AES_FLAGS_ENCRYPT) {
+		ctx->ct.cmd[4] = AES_GCM_CMD4 | cpu_to_le32(gctx->authsize);
+		ctx->ct_size = AES_CT_SIZE_GCM_OUT;
+		ctx->tfm.ctrl[0] = AES_TFM_GCM_OUT;
+	} else {
+		ctx->ct.cmd[4] = AES_GCM_CMD5 | cpu_to_le32(gctx->authsize);
+		ctx->ct.cmd[5] = AES_GCM_CMD6 | cpu_to_le32(gctx->authsize);
+		ctx->ct_size = AES_CT_SIZE_GCM_IN;
+		ctx->tfm.ctrl[0] = AES_TFM_GCM_IN;
+	}
+
+	if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_128))
+		ctx->tfm.ctrl[0] |= AES_TFM_128BITS;
+	else if (ctx->keylen == SIZE_IN_WORDS(AES_KEYSIZE_256))
+		ctx->tfm.ctrl[0] |= AES_TFM_256BITS;
+	else
+		ctx->tfm.ctrl[0] |= AES_TFM_192BITS;
+
+	ctx->tfm.ctrl[0] |= AES_TFM_GHASH_DIG | AES_TFM_GHASH |
+			    AES_TFM_SIZE(ctx->keylen + SIZE_IN_WORDS(
+			    AES_BLOCK_SIZE + ivsize));
+	ctx->tfm.ctrl[1] = AES_TFM_CTR_INIT | AES_TFM_IV_CTR_MODE |
+			   AES_TFM_3IV | AES_TFM_ENC_HASH;
+
+	for (i = 0; i < SIZE_IN_WORDS(ivsize); i++)
+		iv_state[i] = cpu_to_le32(iv[i]);
+}
+
+static int mtk_aes_gcm_dma(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
+			   struct scatterlist *src, struct scatterlist *dst,
+			   size_t len)
+{
+	bool src_aligned, dst_aligned;
+
+	aes->src.sg = src;
+	aes->dst.sg = dst;
+	aes->real_dst = dst;
+
+	src_aligned = mtk_aes_check_aligned(src, len, &aes->src);
+	if (src == dst)
+		dst_aligned = src_aligned;
+	else
+		dst_aligned = mtk_aes_check_aligned(dst, len, &aes->dst);
+
+	if (!src_aligned || !dst_aligned) {
+		if (aes->total > AES_BUF_SIZE)
+			return -ENOMEM;
+
+		if (!src_aligned) {
+			sg_copy_to_buffer(src, sg_nents(src), aes->buf, len);
+			aes->src.sg = &aes->aligned_sg;
+			aes->src.nents = 1;
+			aes->src.remainder = 0;
+		}
+
+		if (!dst_aligned) {
+			aes->dst.sg = &aes->aligned_sg;
+			aes->dst.nents = 1;
+			aes->dst.remainder = 0;
+		}
+
+		sg_init_table(&aes->aligned_sg, 1);
+		sg_set_buf(&aes->aligned_sg, aes->buf, aes->total);
+	}
+
+	mtk_aes_gcm_info_init(cryp, aes, len);
+
+	return mtk_aes_map(cryp, aes);
+}
+
+/* Todo: GMAC */
+static int mtk_aes_gcm_start(struct mtk_cryp *cryp, struct mtk_aes_rec *aes)
+{
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(aes->ctx);
+	struct aead_request *req = aead_request_cast(aes->areq);
+	struct mtk_aes_reqctx *rctx = aead_request_ctx(req);
+	u32 len = req->assoclen + req->cryptlen;
+
+	mtk_aes_set_mode(aes, rctx);
+
+	if (aes->flags & AES_FLAGS_ENCRYPT) {
+		u32 tag[4];
+		/* Compute total process length. */
+		aes->total = len + gctx->authsize;
+		/* Compute text length. */
+		gctx->textlen = req->cryptlen;
+		/* Hardware will append authenticated tag to output buffer */
+		scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1);
+	} else {
+		aes->total = len;
+		gctx->textlen = req->cryptlen - gctx->authsize;
+	}
+	aes->resume = mtk_aes_complete;
+
+	return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len);
+}
+
+static int mtk_aes_gcm_crypt(struct aead_request *req, u64 mode)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct mtk_aes_reqctx *rctx = aead_request_ctx(req);
+
+	rctx->mode = AES_FLAGS_GCM | mode;
+
+	return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT),
+								&req->base);
+}
+
+static void mtk_gcm_setkey_done(struct crypto_async_request *req, int err)
+{
+	struct mtk_aes_gcm_setkey_result *result = req->data;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	result->err = err;
+	complete(&result->completion);
+}
+
+/*
+ * Because of the hardware limitation, we need to pre-calculate key(H)
+ * for the GHASH operation. The result of the encryption operation
+ * need to be stored in the transform state buffer.
+ */
+static int mtk_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+			      u32 keylen)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+	struct crypto_skcipher *ctr = gctx->ctr;
+	struct {
+		u32 hash[4];
+		u8 iv[8];
+
+		struct mtk_aes_gcm_setkey_result result;
+
+		struct scatterlist sg[1];
+		struct skcipher_request req;
+	} *data;
+	const u32 *aes_key;
+	u32 *key_state, *hash_state;
+	int err, i;
+
+	if (keylen != AES_KEYSIZE_256 &&
+	    keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_128) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	key_state = ctx->tfm.state;
+	aes_key = (u32 *)key;
+	ctx->keylen = SIZE_IN_WORDS(keylen);
+
+	for (i = 0; i < ctx->keylen; i++)
+		ctx->tfm.state[i] = cpu_to_le32(aes_key[i]);
+
+	/* Same as crypto_gcm_setkey() from crypto/gcm.c */
+	crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) &
+				  CRYPTO_TFM_REQ_MASK);
+	err = crypto_skcipher_setkey(ctr, key, keylen);
+	crypto_aead_set_flags(aead, crypto_skcipher_get_flags(ctr) &
+			      CRYPTO_TFM_RES_MASK);
+	if (err)
+		return err;
+
+	data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(ctr),
+		       GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	init_completion(&data->result.completion);
+	sg_init_one(data->sg, &data->hash, AES_BLOCK_SIZE);
+	skcipher_request_set_tfm(&data->req, ctr);
+	skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+				      CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      mtk_gcm_setkey_done, &data->result);
+	skcipher_request_set_crypt(&data->req, data->sg, data->sg,
+				   AES_BLOCK_SIZE, data->iv);
+
+	err = crypto_skcipher_encrypt(&data->req);
+	if (err == -EINPROGRESS || err == -EBUSY) {
+		err = wait_for_completion_interruptible(
+			&data->result.completion);
+		if (!err)
+			err = data->result.err;
+	}
+	if (err)
+		goto out;
+
+	hash_state = key_state + ctx->keylen;
+
+	for (i = 0; i < 4; i++)
+		hash_state[i] = cpu_to_be32(data->hash[i]);
+out:
+	kzfree(data);
+	return err;
+}
+
+static int mtk_aes_gcm_setauthsize(struct crypto_aead *aead,
+				   u32 authsize)
+{
+	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
+
+	/* Same as crypto_gcm_authsize() from crypto/gcm.c */
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	gctx->authsize = authsize;
+	return 0;
+}
+
+static int mtk_aes_gcm_encrypt(struct aead_request *req)
+{
+	return mtk_aes_gcm_crypt(req, AES_FLAGS_ENCRYPT);
+}
+
+static int mtk_aes_gcm_decrypt(struct aead_request *req)
+{
+	return mtk_aes_gcm_crypt(req, 0);
+}
+
+static int mtk_aes_gcm_init(struct crypto_aead *aead)
+{
+	struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	struct mtk_cryp *cryp = NULL;
+
+	cryp = mtk_aes_find_dev(&ctx->base);
+	if (!cryp) {
+		pr_err("can't find crypto device\n");
+		return -ENODEV;
+	}
+
+	ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0,
+					 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->ctr)) {
+		pr_err("Error allocating ctr(aes)\n");
+		return PTR_ERR(ctx->ctr);
+	}
+
+	crypto_aead_set_reqsize(aead, sizeof(struct mtk_aes_reqctx));
+	ctx->base.start = mtk_aes_gcm_start;
+	return 0;
+}
+
+static void mtk_aes_gcm_exit(struct crypto_aead *aead)
+{
+	struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+
+	crypto_free_skcipher(ctx->ctr);
+}
+
+static struct aead_alg aes_gcm_alg = {
+	.setkey		= mtk_aes_gcm_setkey,
+	.setauthsize	= mtk_aes_gcm_setauthsize,
+	.encrypt	= mtk_aes_gcm_encrypt,
+	.decrypt	= mtk_aes_gcm_decrypt,
+	.init		= mtk_aes_gcm_init,
+	.exit		= mtk_aes_gcm_exit,
+	.ivsize		= 12,
+	.maxauthsize	= AES_BLOCK_SIZE,
+
+	.base = {
+		.cra_name		= "gcm(aes)",
+		.cra_driver_name	= "gcm-aes-mtk",
+		.cra_priority		= 400,
+		.cra_flags		= CRYPTO_ALG_ASYNC,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct mtk_aes_gcm_ctx),
+		.cra_alignmask		= 0xf,
+		.cra_module		= THIS_MODULE,
+	},
+};
+
 static void mtk_aes_enc_task(unsigned long data)
 {
 	struct mtk_cryp *cryp = (struct mtk_cryp *)data;
@@ -851,6 +1206,8 @@ static void mtk_aes_unregister_algs(void)
 {
 	int i;
 
+	crypto_unregister_aead(&aes_gcm_alg);
+
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++)
 		crypto_unregister_alg(&aes_algs[i]);
 }
@@ -865,6 +1222,10 @@ static int mtk_aes_register_algs(void)
 			goto err_aes_algs;
 	}
 
+	err = crypto_register_aead(&aes_gcm_alg);
+	if (err)
+		goto err_aes_algs;
+
 	return 0;
 
 err_aes_algs:
diff --git a/drivers/crypto/mediatek/mtk-platform.h b/drivers/crypto/mediatek/mtk-platform.h
index 7cd5f98..ed6d871 100644
--- a/drivers/crypto/mediatek/mtk-platform.h
+++ b/drivers/crypto/mediatek/mtk-platform.h
@@ -13,8 +13,10 @@
 #define __MTK_PLATFORM_H_
 
 #include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
-- 
1.9.1

^ permalink raw reply related

* [PATCH] crypto: vmx -- disable preemption to enable vsx in aes_ctr.c
From: Li Zhong @ 2017-01-20  8:35 UTC (permalink / raw)
  To: linux-crypto; +Cc: leosilva, pfsmorigo, herbert

Some preemptible check warnings were reported from enable_kernel_vsx(). This
patch disables preemption in aes_ctr.c before enabling vsx, and they are now 
consistent with other files in the same directory.

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
---
 drivers/crypto/vmx/aes_ctr.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
index 38ed10d..7cf6d31 100644
--- a/drivers/crypto/vmx/aes_ctr.c
+++ b/drivers/crypto/vmx/aes_ctr.c
@@ -80,11 +80,13 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
 	int ret;
 	struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm);
 
+	preempt_disable();
 	pagefault_disable();
 	enable_kernel_vsx();
 	ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
 	disable_kernel_vsx();
 	pagefault_enable();
+	preempt_enable();
 
 	ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen);
 	return ret;
@@ -99,11 +101,13 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
 	u8 *dst = walk->dst.virt.addr;
 	unsigned int nbytes = walk->nbytes;
 
+	preempt_disable();
 	pagefault_disable();
 	enable_kernel_vsx();
 	aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
 	disable_kernel_vsx();
 	pagefault_enable();
+	preempt_enable();
 
 	crypto_xor(keystream, src, nbytes);
 	memcpy(dst, keystream, nbytes);
@@ -132,6 +136,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
 		blkcipher_walk_init(&walk, dst, src, nbytes);
 		ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
 		while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+			preempt_disable();
 			pagefault_disable();
 			enable_kernel_vsx();
 			aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
@@ -143,6 +148,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
 						    walk.iv);
 			disable_kernel_vsx();
 			pagefault_enable();
+			preempt_enable();
 
 			/* We need to update IV mostly for last bytes/round */
 			inc = (nbytes & AES_BLOCK_MASK) / AES_BLOCK_SIZE;

^ permalink raw reply related

* Re: [PATCH 0/2] Introduce AMD Secure Processor device
From: Greg KH @ 2017-01-20  8:45 UTC (permalink / raw)
  To: Brijesh Singh
  Cc: thomas.lendacky, herbert, arnd, lambert.quentin, gary.hook,
	linux-kernel, Julia.Lawall, weiyongjun1, linux-crypto,
	umgwanakikbuti
In-Reply-To: <0442f536-221d-fcef-3009-4bc07403ccd8@amd.com>

On Thu, Jan 19, 2017 at 02:03:12PM -0600, Brijesh Singh wrote:
> Hi Greg,
> 
> On 01/19/2017 12:21 PM, Greg KH wrote:
> > On Thu, Jan 19, 2017 at 01:07:50PM -0500, Brijesh Singh wrote:
> > > The CCP device (drivers/crypto/ccp/ccp.ko) is part of AMD Secure Processor,
> > > which is not dedicated solely to crypto. The AMD Secure Processor includes
> > > CCP and PSP (Platform Secure Processor) devices.
> > > 
> > > This patch series moves the CCP device driver to the misc directory and
> > > creates a framework that allows functional component of the AMD Secure
> > > Processor to be initialized and handled appropriately.
> > 
> > Why the misc directory?  I don't see the justification here...
> > 
> 
> Since this driver is not solely for crypto purposes and do not fit in any of
> the standard categories hence I thought of moving it into misc directory. I
> am open to other suggestions unless Herbert is ok with leaving it into
> crypto and allowing the addition of the Secure Processor support.
> 
> The patch series allows the CCP driver to support other Secure Processor
> functions, e.g Secure Encrypted Virtualization (SEV) key management. In
> past, I tried to add SEV support into existing CCP driver [1] but we quickly
> learned that CCP driver should be moved outside the crypto directory
> otherwise will end up adding non crypto code into drivers/crypto directory.
> Once this cleanup is accepted then I can work to add SEV support inside the
> CCP driver.
> 
> [1] http://marc.info/?l=linux-kernel&m=147204118426151&w=2

Ok, what type of interface will this driver have with userspace and/or
other parts of the kernel?  Is there a misc char device burried in there
somewhere (I couldn't find it in the big diff sent out), or is this
driver just creating specific apis that other parts of the kernel will
call if available?

I think we need to see more code here, broken up into sane and
reviewable pieces, before we could either say "No don't do that!" or
"sure, let me go merge these!" :)

thanks,

greg k-h

^ permalink raw reply

* [PATCH 6/6] crypto: tcrypt: add ECDSA test modes
From: Nitin Kumbhar @ 2017-01-20 11:36 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

Update tcrypt module to include a new ECDSA test
modes. It includes:

tcrypt.ko mode=560 for ECDSA sign/verify validation.
tcrypt.ko mode=561 for ECDSA sign/verify op perf in cycles.
tcrypt.ko mode=561 sec=<seconds> for number of ECDSA sign
  verify ops in given time.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/tcrypt.c |  250 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 crypto/tcrypt.h |  122 +++++++++++++++++++++++++++
 2 files changed, 368 insertions(+), 4 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ae22f05d5936..0e4547fe23a8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -7,6 +7,7 @@
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) 2007 Nokia Siemens Networks
+ * Copyright (c) 2017 NVIDIA Corporation
  *
  * Updated RFC4106 AES-GCM testing.
  *    Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
@@ -25,6 +26,7 @@
 #include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <crypto/skcipher.h>
+#include <crypto/akcipher.h>
 #include <linux/err.h>
 #include <linux/fips.h>
 #include <linux/init.h>
@@ -44,10 +46,12 @@
 #define TVMEMSIZE	4
 
 /*
-* Used by test_cipher_speed()
-*/
-#define ENCRYPT 1
-#define DECRYPT 0
+ * Used by test_cipher_speed()
+ */
+#define DECRYPT		0
+#define ENCRYPT		1
+#define SIGN		2
+#define VERIFY		3
 
 #define MAX_DIGEST_SIZE		64
 
@@ -994,6 +998,223 @@ static void test_cipher_speed(const char *algo, int enc, unsigned int secs,
 				   false);
 }
 
+static inline int do_one_akcipher_op(struct akcipher_request *r, int ret)
+{
+	if (ret == -EINPROGRESS || ret == -EBUSY) {
+		struct tcrypt_result *tr = r->base.data;
+
+		wait_for_completion(&tr->completion);
+		reinit_completion(&tr->completion);
+		ret = tr->err;
+	}
+	return ret;
+}
+
+static int test_akcipher_jiffies(struct akcipher_request *r, int op, int secs)
+{
+	unsigned long start, end;
+	int count, ret;
+
+	for (start = jiffies, end = start + secs * HZ, count = 0;
+	     time_before(jiffies, end); count++) {
+
+		switch (op) {
+		case SIGN:
+			ret = do_one_akcipher_op(r, crypto_akcipher_sign(r));
+			break;
+		case VERIFY:
+			ret = do_one_akcipher_op(r, crypto_akcipher_verify(r));
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+		if (ret)
+			return ret;
+	}
+
+	pr_info("%d operations in %d seconds\n", count, secs);
+	return 0;
+}
+
+static int test_akcipher_cycles(struct akcipher_request *r, int op)
+{
+	unsigned long cycles = 0;
+	int ret = 0;
+	int i;
+
+	/* Warm-up run. */
+	for (i = 0; i < 4; i++) {
+		switch (op) {
+		case SIGN:
+			ret = do_one_akcipher_op(r, crypto_akcipher_sign(r));
+			break;
+		case VERIFY:
+			ret = do_one_akcipher_op(r, crypto_akcipher_verify(r));
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+		if (ret)
+			goto out;
+	}
+
+	/* The real thing. */
+	for (i = 0; i < 8; i++) {
+		cycles_t start, end;
+
+		start = get_cycles();
+		switch (op) {
+		case SIGN:
+			ret = do_one_akcipher_op(r, crypto_akcipher_sign(r));
+			break;
+		case VERIFY:
+			ret = do_one_akcipher_op(r, crypto_akcipher_verify(r));
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+		end = get_cycles();
+
+		if (ret)
+			goto out;
+
+		cycles += end - start;
+	}
+out:
+	if (ret == 0)
+		pr_info("1 operation in %lu cycles\n", (cycles + 4) / 8);
+
+	return ret;
+}
+
+static void test_akcipher_speed(const char *algo, int op, unsigned int secs,
+				struct akcipher_speed_template *template,
+				unsigned int tcount, u8 *keysize)
+{
+	unsigned int ret, i, j;
+	struct tcrypt_result tresult;
+	const char *key;
+	struct akcipher_request *req;
+	struct crypto_akcipher *tfm;
+	unsigned int m_size = 0;
+	unsigned int nbytes = 0;
+	const char *o;
+
+	if (op == SIGN)
+		o = "sign";
+	else if (op == VERIFY)
+		o = "verify";
+	else
+		return;
+
+	tfm = crypto_alloc_akcipher(algo, 0, 0);
+	if (IS_ERR(tfm)) {
+		pr_err("failed to load transform for %s: %ld\n", algo,
+		       PTR_ERR(tfm));
+		return;
+	}
+
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req) {
+		pr_err("tcrypt: akcipher: Failed to allocate request for %s\n",
+		       algo);
+		goto out;
+	}
+
+	init_completion(&tresult.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &tresult);
+
+	i = 0;
+	do {
+		struct scatterlist sg[TVMEMSIZE];
+
+		memset(tvmem[0], 0xff, PAGE_SIZE);
+
+		/* set key */
+		key = tvmem[0];
+		for (j = 0; j < tcount; j++) {
+			if (template[j].key_len == *keysize) {
+				key = template[j].key;
+				break;
+			}
+		}
+
+		ret = crypto_akcipher_set_pub_key(tfm, key, *keysize);
+		if (ret) {
+			pr_err("set_pub_key() failed\n");
+			goto out_free_req;
+		}
+
+		ret = crypto_akcipher_set_priv_key(tfm, key, *keysize);
+		if (ret) {
+			pr_err("set_priv_key() failed\n");
+			goto out_free_req;
+		}
+
+		/* set up src/dst buffs */
+		sg_init_table(sg, TVMEMSIZE);
+		if (op == SIGN) {
+			m_size = template[j].m_size;
+			nbytes = template[j].c_size / 3;
+
+			memcpy(tvmem[0], template[j].m, m_size);
+
+			sg_set_buf(&sg[0], tvmem[0], m_size);
+			akcipher_request_set_crypt(req, sg, sg,
+						   m_size, PAGE_SIZE);
+		} else if (op == VERIFY) {
+			m_size = template[j].m_size;
+			nbytes = template[j].c_size / 3;
+
+			memcpy(tvmem[0], template[j].m, m_size);
+			memcpy(tvmem[1], (u8 *)(template[j].c) + nbytes,
+			       nbytes);
+			memcpy(tvmem[2], (u8 *)(template[j].c) + 2 * nbytes,
+			       nbytes);
+
+			sg_set_buf(&sg[0], tvmem[0], m_size);
+			sg_set_buf(&sg[1], tvmem[1], nbytes);
+			sg_set_buf(&sg[2], tvmem[2], nbytes);
+
+			akcipher_request_set_crypt(req, sg, sg,
+						   m_size + 2 * nbytes,
+						   PAGE_SIZE);
+		} else {
+			pr_err("invalid op\n");
+			ret = -EINVAL;
+			goto out_free_req;
+		}
+
+
+		pr_info("\ntesting speed of %s (%s) %s with keysize %d\n",
+			algo, get_driver_name(crypto_akcipher, tfm), o,
+			nbytes * 8);
+
+		if (secs)
+			ret = test_akcipher_jiffies(req, op, secs);
+		else
+			ret = test_akcipher_cycles(req, op);
+
+		if (ret) {
+			pr_err("%s() failed\n", o);
+			break;
+		}
+
+		i++;
+		keysize++;
+
+	} while (*keysize);
+
+out_free_req:
+	akcipher_request_free(req);
+out:
+	crypto_free_akcipher(tfm);
+}
+
 static void test_available(void)
 {
 	char **name = check;
@@ -2035,6 +2256,27 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 				   speed_template_8_32);
 		break;
 
+	case 560:
+		ret += tcrypt_test("ecdsa");
+		break;
+
+	case 561:
+#ifndef CONFIG_CRYPTO_FIPS
+		test_akcipher_speed("ecdsa", SIGN, sec,
+				    ecdsa_speed_template, ECDSA_SPEED_VECTORS,
+				    akc_speed_template_P192);
+		test_akcipher_speed("ecdsa", VERIFY, sec,
+				    ecdsa_speed_template, ECDSA_SPEED_VECTORS,
+				    akc_speed_template_P192);
+#endif
+		test_akcipher_speed("ecdsa", SIGN, sec,
+				    ecdsa_speed_template, ECDSA_SPEED_VECTORS,
+				    akc_speed_template_P256);
+		test_akcipher_speed("ecdsa", VERIFY, sec,
+				    ecdsa_speed_template, ECDSA_SPEED_VECTORS,
+				    akc_speed_template_P256);
+		break;
+
 	case 1000:
 		test_available();
 		break;
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index f0bfee1bb293..bd6a4b1cbcbe 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -7,6 +7,7 @@
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) 2007 Nokia Siemens Networks
+ * Copyright (c) 2017 NVIDIA Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -17,6 +18,16 @@
 #ifndef _CRYPTO_TCRYPT_H
 #define _CRYPTO_TCRYPT_H
 
+struct akcipher_speed_template {
+	unsigned char *key;
+	unsigned char *m;
+	unsigned char *c;
+	unsigned int key_len;
+	unsigned int m_size;
+	unsigned int c_size;
+	bool public_key_vec;
+};
+
 struct cipher_speed_template {
 	const char *key;
 	unsigned int klen;
@@ -48,6 +59,117 @@ struct hash_speed {
 };
 
 /*
+ * ECDSA test vectors.
+ */
+#ifdef CONFIG_CRYPTO_FIPS
+#define ECDSA_SPEED_VECTORS	1
+#else
+#define ECDSA_SPEED_VECTORS	2
+#endif
+
+static struct akcipher_speed_template ecdsa_speed_template[] = {
+	{
+#ifndef CONFIG_CRYPTO_FIPS
+		/* [P-192,SHA-256] */
+		.m =
+		/* Msg / Hash */
+		"\xd0\xd8\xc0\x99\xe0\xe2\xf7\xf8"
+		"\x87\xe1\x6d\x11\xe1\xcc\x20\x43"
+		"\xaf\xc0\x80\xdb\x47\x72\xfa\xe3"
+		"\x95\xe5\xd1\x34\x7d\x31\xe8\x5a",
+		.m_size = 32,
+		.key =
+		/* version */
+		"\x01"
+		/* curve_id */
+		"\x01"
+		/* d */
+		"\x47\x7a\xf2\x5c\x86\xef\x09\x08"
+		"\xa4\x9a\x47\x53\x06\xfc\x61\xbc"
+		"\xa5\x6f\xdd\x7d\x2f\xd2\xed\x24"
+		/* Qx */
+		"\xdc\x14\xd4\xd8\x2e\x1e\x25\x2f"
+		"\x66\x28\xaa\x80\xbc\x38\x6a\x07"
+		"\x8a\x70\xb7\x74\x71\x2d\xf1\x9b"
+		/* Qy */
+		"\x98\x34\x57\x11\xb0\xdc\x3d\xff"
+		"\xfc\xdc\xfe\xa2\x1c\x47\x9e\x4e"
+		"\x82\x08\xfc\x7d\xd0\xc8\x54\x48",
+		.key_len = 74,
+		.c =
+		/* k */
+		"\x3e\x70\xc7\x86\xaf\xaa\x71\x7c"
+		"\x68\x96\xc5\xc3\xec\xb8\x29\xa3"
+		"\xfa\xf7\xa5\x36\xa2\x17\xc8\xa5"
+		/* R */
+		"\xf8\xef\x13\xa8\x86\xe6\x73\x85"
+		"\xdf\x2e\x88\x99\x91\x9b\xc2\x90"
+		"\xea\x1f\x36\xf4\xec\xba\x4a\x35"
+		/* S */
+		"\xc1\x82\x9e\x94\xb7\x58\x2c\x63"
+		"\x8e\xd7\x15\x5a\x38\x47\x30\x9b"
+		"\x1c\x11\x86\xac\x00\x00\xf5\x80",
+		.c_size = 72,
+	}, {
+#endif
+		/* [P-256,SHA-256] */
+		.m =
+		/* Msg / Hash */
+		"\x56\xec\x33\xa1\xa6\xe7\xc4\xdb"
+		"\x77\x03\x90\x1a\xfb\x2e\x1e\x4e"
+		"\x50\x09\xfe\x04\x72\x89\xc5\xc2"
+		"\x42\x13\x6c\xe3\xb7\xf6\xac\x44",
+		.m_size = 32,
+		.key =
+		/* version */
+		"\x01"
+		/* curve_id */
+		"\x02"
+		/* d */
+		"\x64\xb4\x72\xda\x6d\xa5\x54\xca"
+		"\xac\x3e\x4e\x0b\x13\xc8\x44\x5b"
+		"\x1a\x77\xf4\x59\xee\xa8\x4f\x1f"
+		"\x58\x8b\x5f\x71\x3d\x42\x9b\x51"
+		/* Qx */
+		"\x83\xbf\x71\xc2\x46\xff\x59\x3c"
+		"\x2f\xb1\xbf\x4b\xe9\x5d\x56\xd3"
+		"\xcc\x8f\xdb\x48\xa2\xbf\x33\xf0"
+		"\xf4\xc7\x5f\x07\x1c\xe9\xcb\x1c"
+		/* Qy */
+		"\xa9\x4c\x9a\xa8\x5c\xcd\x7c\xdc"
+		"\x78\x4e\x40\xb7\x93\xca\xb7\x6d"
+		"\xe0\x13\x61\x0e\x2c\xdb\x1f\x1a"
+		"\xa2\xf9\x11\x88\xc6\x14\x40\xce",
+		.key_len = 98,
+		.c =
+		/* k */
+		"\xde\x68\x2a\x64\x87\x07\x67\xb9"
+		"\x33\x5d\x4f\x82\x47\x62\x4a\x3b"
+		"\x7f\x3c\xe9\xf9\x45\xf2\x80\xa2"
+		"\x61\x6a\x90\x4b\xb1\xbb\xa1\x94"
+		/* R */
+		"\xac\xc2\xc8\x79\x6f\x5e\xbb\xca"
+		"\x7a\x5a\x55\x6a\x1f\x6b\xfd\x2a"
+		"\xed\x27\x95\x62\xd6\xe3\x43\x88"
+		"\x5b\x79\x14\xb5\x61\x80\xac\xf3"
+		/* S */
+		"\x03\x89\x05\xcc\x2a\xda\xcd\x3c"
+		"\x5a\x17\x6f\xe9\x18\xb2\x97\xef"
+		"\x1c\x37\xf7\x2b\x26\x76\x6c\x78"
+		"\xb2\xa6\x05\xca\x19\x78\xf7\x8b",
+		.c_size = 96,
+	},
+};
+
+/*
+ * AKCipher speed tests
+ */
+#ifndef CONFIG_CRYPTO_FIPS
+static u8 akc_speed_template_P192[] = {74, 0};
+#endif
+static u8 akc_speed_template_P256[] = {98, 0};
+
+/*
  * Cipher speed tests
  */
 static u8 speed_template_8[] = {8, 0};
-- 
1.7.6.3

^ permalink raw reply related

* [PATCH 2/6] crypto: ecc: add vli and ecc ops
From: Nitin Kumbhar @ 2017-01-20 11:35 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

Add functions to copy vli from buffers, to print vli in
big endian format, for vli mod and mod multiplication ops
and ecc point addition.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/ecc.c |  168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 168 insertions(+), 0 deletions(-)

diff --git a/crypto/ecc.c b/crypto/ecc.c
index a8c10e725138..6ad785c4c12a 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -208,6 +208,42 @@ static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
 		dest[i] = src[i];
 }
 
+/* Copy from vli to buf.
+ * For buffers smaller than vli: copy only LSB nbytes from vli.
+ * For buffers larger than vli : fill up remaining buf with zeroes.
+ */
+void vli_copy_to_buf(u8 *dst_buf, unsigned int buf_len,
+		     const u64 *src_vli, unsigned int ndigits)
+{
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	u8 *vli = (u8 *)src_vli;
+	int i;
+
+	for (i = 0; i < buf_len && i < nbytes; i++)
+		dst_buf[i] = vli[i];
+
+	for (; i < buf_len; i++)
+		dst_buf[i] = 0;
+}
+
+/* Copy from buffer to vli.
+ * For buffers smaller than vli: fill up remaining vli with zeroes.
+ * For buffers larger than vli : copy only LSB nbytes to vli.
+ */
+void vli_copy_from_buf(u64 *dst_vli, unsigned int ndigits,
+		       const u8 *src_buf, unsigned int buf_len)
+{
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	u8 *vli = (u8 *)dst_vli;
+	int i;
+
+	for (i = 0; i < buf_len && i < nbytes; i++)
+		vli[i] = src_buf[i];
+
+	for (; i < nbytes; i++)
+		vli[i] = 0;
+}
+
 /* Returns sign of left - right. */
 static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
 {
@@ -440,6 +476,83 @@ static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
 		vli_add(result, result, mod, ndigits);
 }
 
+/* Computes result = input % mod.
+ * Assumes that input < mod, result != mod.
+ */
+void vli_mod(u64 *result, const u64 *input, const u64 *mod,
+	     unsigned int ndigits)
+{
+	if (vli_cmp(input, mod, ndigits) >= 0)
+		vli_sub(result, input, mod, ndigits);
+	else
+		vli_set(result, input, ndigits);
+}
+
+/* Print vli in big-endian format.
+ * The bytes are printed in hex.
+ */
+void vli_print(char *vli_name, const u64 *vli, unsigned int ndigits)
+{
+	int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	int buf_size = 2 * ECC_MAX_DIGIT_BYTES + 1;
+	unsigned char *c, buf[buf_size];
+	int i, j;
+
+	c = (unsigned char *)vli;
+
+	for (i = nbytes - 1, j = 0; i >= 0 && j < buf_size; i--, j += 2)
+		snprintf(&buf[j], 3, "%02x", *(c + i));
+
+	buf[j] = '\0';
+
+	pr_info("%20s(BigEnd)=%s\n", vli_name, buf);
+}
+
+/* Computes result = (left * right) % mod.
+ * Assumes that left < mod and right < mod, result != mod.
+ * Uses:
+ *	(a * b) % m = ((a % m) * (b % m)) % m
+ *	(a * b) % m = (a + a + ... + a) % m = b modular additions of (a % m)
+ */
+void vli_mod_mult(u64 *result, const u64 *left, const u64 *right,
+		  const u64 *mod, unsigned int ndigits)
+{
+	u64 t1[ndigits], mm[ndigits];
+	u64 aa[ndigits], bb[ndigits];
+
+	vli_clear(result, ndigits);
+	vli_set(aa, left, ndigits);
+	vli_set(bb, right, ndigits);
+	vli_set(mm, mod, ndigits);
+
+	/* aa = aa % mm */
+	vli_mod(aa, aa, mm, ndigits);
+
+	/* bb = bb % mm */
+	vli_mod(bb, bb, mm, ndigits);
+
+	while (!vli_is_zero(bb, ndigits)) {
+
+		/* if bb is odd i.e. 0th bit set then add
+		 * aa i.e. result = (result + aa) % mm
+		 */
+		if (vli_test_bit(bb, 0))
+			vli_mod_add(result, result, aa, mm, ndigits);
+
+		/* bb = bb / 2 = bb >> 1 */
+		vli_rshift1(bb, ndigits);
+
+		/* aa = (aa * 2) % mm */
+		vli_sub(t1, mm, aa, ndigits);
+		if (vli_cmp(aa, t1, ndigits) == -1)
+			/* if aa < t1 then aa = aa * 2 = aa << 1*/
+			vli_lshift(aa, aa, 1, ndigits);
+		else
+			/* if aa >= t1 then aa = aa - t1 */
+			vli_sub(aa, aa, t1, ndigits);
+	}
+}
+
 /* Computes p_result = p_product % curve_p.
  * See algorithm 5 and 6 from
  * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
@@ -878,6 +991,61 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
 	vli_set(x1, t7, ndigits);
 }
 
+/* Point addition.
+ * Add 2 distinct points on elliptic curve to get a new point.
+ *
+ * P = (x1,y1)and Q = (x2, y2) then P + Q = (x3,y3) where
+ * x3 = ((y2-y1)/(x2-x1))^2 - x1 - x2
+ * y3 = ((y2-y1)/(x2-x1))(x1-x3) - y1
+ *
+ * Q => P + Q
+ */
+void ecc_point_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
+		   unsigned int ndigits)
+{
+	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
+	u64 t5[ndigits];
+	u64 t6[ndigits];
+	u64 t7[ndigits];
+
+	/* t6 = x2 - x1 */
+	vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
+	/* t6 = (x2 - x1)^2 = A */
+	vli_mod_square_fast(t6, t6, curve_prime, ndigits);
+	vli_mod_inv(t7, t6, curve_prime, ndigits);
+	/* t5 = x2 - x1 */
+	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
+	/* t5 = (x2 - x1)^2 = A */
+	vli_mod_square_fast(t5, t5, curve_prime, ndigits);
+	/* t1 = x1*A = B = x1*(x2-x1)^2*/
+	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
+	/* t3 = x2*A = C = x2*(x2-x1)^2*/
+	vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
+	/* t4 = y2 - y1 */
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+	/* t5 = (y2 - y1)^2 = D */
+	vli_mod_square_fast(t5, y2, curve_prime, ndigits);
+
+	/* t5 = D - B = (y2 - y1)^2 - x1*(x2-x1)^2 */
+	vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
+	/* t5 = D - B - C = x3 = (y2 - y1)^2 - x1*(x2-x1)^2 - x2*(x2-x1)^2*/
+	vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
+
+	/* t3 = C - B = x2*(x2-x1)^2 - x1*(x2-x1)^2 */
+	vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
+	/* t2 = y1*(C - B) = y1*(x2*(x2-x1)^2 - x1*(x2-x1)^2)*/
+	vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits);
+	/* t3 = B - x3 = x1*(x2-x1)^2 - x3*/
+	vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
+	/* t4 = (y2 - y1)*(B - x3)  = (y2 - y1)*(x1*(x2-x1)^2 - x3)*/
+	vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits);
+	/* t4 = y3 = ((y2 - y1)*(x1*(x2-x1)^2 - x3)) - y1*/
+	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
+
+	vli_mod_mult_fast(t5, t5, t7,  curve_prime, ndigits);
+	vli_set(x2, t5, ndigits);
+}
+
 static void ecc_point_mult(struct ecc_point *result,
 			   const struct ecc_point *point, const u64 *scalar,
 			   u64 *initial_z, u64 *curve_prime,
-- 
1.7.6.3

^ permalink raw reply related

* [PATCH 1/6] crypto: ecc: separate out ecc and ecdh
From: Nitin Kumbhar @ 2017-01-20 11:35 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

Add ECC operations i.e. vli and ecc point ops as a separate
module under CRYPTO_ECC kconfig. This allows other ECC
algorithms like ECDSA & ECIES to reuse these ops even when ECDH
is disabled with CRYPTO_ECDH kconfig.

With these changes, ECDH specific functions are consolidated
as ECDH helper routines and ECC curves are moved to ECC specific
files.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/Kconfig          |    7 +++
 crypto/Makefile         |    5 +-
 crypto/ecc.c            |  133 ++++++++++++++--------------------------------
 crypto/ecc.h            |   45 ++--------------
 crypto/ecc_curve_defs.h |   51 ++++--------------
 crypto/ecc_ecdh.h       |   54 +++++++++++++++++++
 crypto/ecdh.c           |    4 +-
 crypto/ecdh_helper.c    |   94 +++++++++++++++++++++++++++++++++
 include/crypto/ecc.h    |   24 +++++++++
 include/crypto/ecdh.h   |   10 +---
 10 files changed, 243 insertions(+), 184 deletions(-)
 create mode 100644 crypto/ecc_ecdh.h
 create mode 100644 include/crypto/ecc.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 160f08e721cc..e240075d6f46 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -127,9 +127,16 @@ config CRYPTO_DH
 	help
 	  Generic implementation of the Diffie-Hellman algorithm.
 
+config CRYPTO_ECC
+	tristate "ECC functions"
+	help
+	  Implementation of ECC functions
+
+
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
 	select CRYTPO_KPP
+	select CRYPTO_ECC
 	help
 	  Generic implementation of the ECDH algorithm
 
diff --git a/crypto/Makefile b/crypto/Makefile
index b8f0e3eb0791..827740a47a37 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -33,8 +33,9 @@ obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
 dh_generic-y := dh.o
 dh_generic-y += dh_helper.o
 obj-$(CONFIG_CRYPTO_DH) += dh_generic.o
-ecdh_generic-y := ecc.o
-ecdh_generic-y += ecdh.o
+
+obj-$(CONFIG_CRYPTO_ECC) += ecc.o
+ecdh_generic-y := ecdh.o
 ecdh_generic-y += ecdh_helper.o
 obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
 
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 414c78a9c214..a8c10e725138 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2013, Kenneth MacKay
  * All rights reserved.
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -28,16 +29,54 @@
 #include <linux/slab.h>
 #include <linux/swab.h>
 #include <linux/fips.h>
-#include <crypto/ecdh.h>
 
 #include "ecc.h"
-#include "ecc_curve_defs.h"
 
 typedef struct {
 	u64 m_low;
 	u64 m_high;
 } uint128_t;
 
+/* NIST P-192 */
+static u64 nist_p192_g_x[] = { 0xF4FF0AFD82FF1012ull, 0x7CBF20EB43A18800ull,
+				0x188DA80EB03090F6ull };
+static u64 nist_p192_g_y[] = { 0x73F977A11E794811ull, 0x631011ED6B24CDD5ull,
+				0x07192B95FFC8DA78ull };
+static u64 nist_p192_p[] = { 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull,
+				0xFFFFFFFFFFFFFFFFull };
+static u64 nist_p192_n[] = { 0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull,
+				0xFFFFFFFFFFFFFFFFull };
+static struct ecc_curve nist_p192 = {
+	.name = "nist_192",
+	.g = {
+		.x = nist_p192_g_x,
+		.y = nist_p192_g_y,
+		.ndigits = 3,
+	},
+	.p = nist_p192_p,
+	.n = nist_p192_n
+};
+
+/* NIST P-256 */
+static u64 nist_p256_g_x[] = { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull,
+				0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull };
+static u64 nist_p256_g_y[] = { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull,
+				0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull };
+static u64 nist_p256_p[] = { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull,
+				0x0000000000000000ull, 0xFFFFFFFF00000001ull };
+static u64 nist_p256_n[] = { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull,
+				0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull };
+static struct ecc_curve nist_p256 = {
+	.name = "nist_256",
+	.g = {
+		.x = nist_p256_g_x,
+		.y = nist_p256_g_y,
+		.ndigits = 4,
+	},
+	.p = nist_p256_p,
+	.n = nist_p256_n
+};
+
 static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
 {
 	switch (curve_id) {
@@ -926,93 +965,3 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 
 	return 0;
 }
-
-int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len)
-{
-	int ret = 0;
-	struct ecc_point *pk;
-	u64 priv[ndigits];
-	unsigned int nbytes;
-	const struct ecc_curve *curve = ecc_get_curve(curve_id);
-
-	if (!private_key || !curve) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
-
-	pk = ecc_alloc_point(ndigits);
-	if (!pk) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
-	if (ecc_point_is_zero(pk)) {
-		ret = -EAGAIN;
-		goto err_free_point;
-	}
-
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
-	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
-	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
-
-err_free_point:
-	ecc_free_point(pk);
-out:
-	return ret;
-}
-
-int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len)
-{
-	int ret = 0;
-	struct ecc_point *product, *pk;
-	u64 priv[ndigits];
-	u64 rand_z[ndigits];
-	unsigned int nbytes;
-	const struct ecc_curve *curve = ecc_get_curve(curve_id);
-
-	if (!private_key || !public_key || !curve) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
-
-	get_random_bytes(rand_z, nbytes);
-
-	pk = ecc_alloc_point(ndigits);
-	if (!pk) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	product = ecc_alloc_point(ndigits);
-	if (!product) {
-		ret = -ENOMEM;
-		goto err_alloc_product;
-	}
-
-	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
-	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
-	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
-
-	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
-
-	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
-
-	if (ecc_point_is_zero(product))
-		ret = -EFAULT;
-
-	ecc_free_point(product);
-err_alloc_product:
-	ecc_free_point(pk);
-out:
-	return ret;
-}
diff --git a/crypto/ecc.h b/crypto/ecc.h
index 663d598c7406..5db82223d485 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2013, Kenneth MacKay
  * All rights reserved.
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
@@ -26,9 +27,9 @@
 #ifndef _CRYPTO_ECC_H
 #define _CRYPTO_ECC_H
 
-#define ECC_MAX_DIGITS	4 /* 256 */
+#include <crypto/ecc.h>
 
-#define ECC_DIGITS_TO_BYTES_SHIFT 3
+#include "ecc_curve_defs.h"
 
 /**
  * ecc_is_key_valid() - Validate a given ECDH private key
@@ -42,42 +43,4 @@
  */
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 		     const u8 *private_key, unsigned int private_key_len);
-
-/**
- * ecdh_make_pub_key() - Compute an ECC public key
- *
- * @curve_id:		id representing the curve to use
- * @private_key:	pregenerated private key for the given curve
- * @private_key_len:	length of private_key
- * @public_key:		buffer for storing the public key generated
- * @public_key_len:	length of the public_key buffer
- *
- * Returns 0 if the public key was generated successfully, a negative value
- * if an error occurred.
- */
-int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
-		      const u8 *private_key, unsigned int private_key_len,
-		      u8 *public_key, unsigned int public_key_len);
-
-/**
- * crypto_ecdh_shared_secret() - Compute a shared secret
- *
- * @curve_id:		id representing the curve to use
- * @private_key:	private key of part A
- * @private_key_len:	length of private_key
- * @public_key:		public key of counterpart B
- * @public_key_len:	length of public_key
- * @secret:		buffer for storing the calculated shared secret
- * @secret_len:		length of the secret buffer
- *
- * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
- * before using it for symmetric encryption or HMAC.
- *
- * Returns 0 if the shared secret was generated successfully, a negative value
- * if an error occurred.
- */
-int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
-		       const u8 *private_key, unsigned int private_key_len,
-		       const u8 *public_key, unsigned int public_key_len,
-		       u8 *secret, unsigned int secret_len);
-#endif
+#endif /* _CRYPTO_ECC_H */
diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h
index 03ae5f714028..baacf32bca16 100644
--- a/crypto/ecc_curve_defs.h
+++ b/crypto/ecc_curve_defs.h
@@ -1,3 +1,12 @@
+/*
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
 #ifndef _CRYTO_ECC_CURVE_DEFS_H
 #define _CRYTO_ECC_CURVE_DEFS_H
 
@@ -14,44 +23,4 @@ struct ecc_curve {
 	u64 *n;
 };
 
-/* NIST P-192 */
-static u64 nist_p192_g_x[] = { 0xF4FF0AFD82FF1012ull, 0x7CBF20EB43A18800ull,
-				0x188DA80EB03090F6ull };
-static u64 nist_p192_g_y[] = { 0x73F977A11E794811ull, 0x631011ED6B24CDD5ull,
-				0x07192B95FFC8DA78ull };
-static u64 nist_p192_p[] = { 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull,
-				0xFFFFFFFFFFFFFFFFull };
-static u64 nist_p192_n[] = { 0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull,
-				0xFFFFFFFFFFFFFFFFull };
-static struct ecc_curve nist_p192 = {
-	.name = "nist_192",
-	.g = {
-		.x = nist_p192_g_x,
-		.y = nist_p192_g_y,
-		.ndigits = 3,
-	},
-	.p = nist_p192_p,
-	.n = nist_p192_n
-};
-
-/* NIST P-256 */
-static u64 nist_p256_g_x[] = { 0xF4A13945D898C296ull, 0x77037D812DEB33A0ull,
-				0xF8BCE6E563A440F2ull, 0x6B17D1F2E12C4247ull };
-static u64 nist_p256_g_y[] = { 0xCBB6406837BF51F5ull, 0x2BCE33576B315ECEull,
-				0x8EE7EB4A7C0F9E16ull, 0x4FE342E2FE1A7F9Bull };
-static u64 nist_p256_p[] = { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull,
-				0x0000000000000000ull, 0xFFFFFFFF00000001ull };
-static u64 nist_p256_n[] = { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull,
-				0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull };
-static struct ecc_curve nist_p256 = {
-	.name = "nist_256",
-	.g = {
-		.x = nist_p256_g_x,
-		.y = nist_p256_g_y,
-		.ndigits = 4,
-	},
-	.p = nist_p256_p,
-	.n = nist_p256_n
-};
-
-#endif
+#endif /* _CRYTO_ECC_CURVE_DEFS_H */
diff --git a/crypto/ecc_ecdh.h b/crypto/ecc_ecdh.h
new file mode 100644
index 000000000000..f77b1fe094c9
--- /dev/null
+++ b/crypto/ecc_ecdh.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2013, Kenneth MacKay. All rights reserved.
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#ifndef _CRYPTO_ECC_ECDH_H
+#define _CRYPTO_ECC_ECDH_H
+
+#include "ecc.h"
+
+/**
+ * ecdh_make_pub_key() - Compute an ECC public key
+ *
+ * @curve_id:		id representing the curve to use
+ * @private_key:	pregenerated private key for the given curve
+ * @private_key_len:	length of private_key
+ * @public_key:		buffer for storing the public key generated
+ * @public_key_len:	length of the public_key buffer
+ *
+ * Returns 0 if the public key was generated successfully, a negative value
+ * if an error occurred.
+ */
+int ecdh_make_pub_key(const unsigned int curve_id, unsigned int ndigits,
+		      const u8 *private_key, unsigned int private_key_len,
+		      u8 *public_key, unsigned int public_key_len);
+
+/**
+ * crypto_ecdh_shared_secret() - Compute a shared secret
+ *
+ * @curve_id:		id representing the curve to use
+ * @private_key:	private key of part A
+ * @private_key_len:	length of private_key
+ * @public_key:		public key of counterpart B
+ * @public_key_len:	length of public_key
+ * @secret:		buffer for storing the calculated shared secret
+ * @secret_len:		length of the secret buffer
+ *
+ * Note: It is recommended that you hash the result of crypto_ecdh_shared_secret
+ * before using it for symmetric encryption or HMAC.
+ *
+ * Returns 0 if the shared secret was generated successfully, a negative value
+ * if an error occurred.
+ */
+int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
+		       const u8 *private_key, unsigned int private_key_len,
+		       const u8 *public_key, unsigned int public_key_len,
+		       u8 *secret, unsigned int secret_len);
+
+#endif /* _CRYPTO_ECC_ECDH_H */
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 3de289806d67..2b83ff3a4b9a 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -2,6 +2,7 @@
  *
  * Copyright (c) 2016, Intel Corporation
  * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
+ * Copyright (c) 2017, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public Licence
@@ -14,7 +15,8 @@
 #include <crypto/kpp.h>
 #include <crypto/ecdh.h>
 #include <linux/scatterlist.h>
-#include "ecc.h"
+
+#include "ecc_ecdh.h"
 
 struct ecdh_ctx {
 	unsigned int curve_id;
diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c
index 3cd8a2414e60..b3857f3bfcee 100644
--- a/crypto/ecdh_helper.c
+++ b/crypto/ecdh_helper.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2016, Intel Corporation
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ * Copyright (c) 2017, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public Licence
@@ -11,9 +12,12 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/string.h>
+#include <linux/random.h>
 #include <crypto/ecdh.h>
 #include <crypto/kpp.h>
 
+#include "ecc_ecdh.h"
+
 #define ECDH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 2 * sizeof(short))
 
 static inline u8 *ecdh_pack_data(void *dst, const void *src, size_t sz)
@@ -28,6 +32,96 @@
 	return src + sz;
 }
 
+int ecdh_make_pub_key(unsigned int curve_id, unsigned int ndigits,
+		      const u8 *private_key, unsigned int private_key_len,
+		      u8 *public_key, unsigned int public_key_len)
+{
+	int ret = 0;
+	struct ecc_point *pk;
+	u64 priv[ndigits];
+	unsigned int nbytes;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+	if (!private_key || !curve) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+
+	pk = ecc_alloc_point(ndigits);
+	if (!pk) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
+	if (ecc_point_is_zero(pk)) {
+		ret = -EAGAIN;
+		goto err_free_point;
+	}
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
+	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
+
+err_free_point:
+	ecc_free_point(pk);
+out:
+	return ret;
+}
+
+int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
+		       const u8 *private_key, unsigned int private_key_len,
+		       const u8 *public_key, unsigned int public_key_len,
+		       u8 *secret, unsigned int secret_len)
+{
+	int ret = 0;
+	struct ecc_point *product, *pk;
+	u64 priv[ndigits];
+	u64 rand_z[ndigits];
+	unsigned int nbytes;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+	if (!private_key || !public_key || !curve) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	get_random_bytes(rand_z, nbytes);
+
+	pk = ecc_alloc_point(ndigits);
+	if (!pk) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	product = ecc_alloc_point(ndigits);
+	if (!product) {
+		ret = -ENOMEM;
+		goto err_alloc_product;
+	}
+
+	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
+	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
+	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
+
+	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
+
+	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
+
+	if (ecc_point_is_zero(product))
+		ret = -EFAULT;
+
+	ecc_free_point(product);
+err_alloc_product:
+	ecc_free_point(pk);
+out:
+	return ret;
+}
+
 int crypto_ecdh_key_len(const struct ecdh *params)
 {
 	return ECDH_KPP_SECRET_MIN_SIZE + params->key_size;
diff --git a/include/crypto/ecc.h b/include/crypto/ecc.h
new file mode 100644
index 000000000000..27957f805fd6
--- /dev/null
+++ b/include/crypto/ecc.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_ECC_
+#define _CRYPTO_ECC_
+
+/* Curves IDs */
+#define ECC_CURVE_NIST_P192	0x0001
+#define ECC_CURVE_NIST_P256	0x0002
+
+#define ECC_MAX_DIGITS	4 /* 256 */
+
+#define ECC_DIGITS_TO_BYTES_SHIFT 3
+
+#define ECC_MAX_DIGIT_BYTES (ECC_MAX_DIGITS << ECC_DIGITS_TO_BYTES_SHIFT)
+
+#endif /* _CRYPTO_ECC_ */
diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h
index 03a64f62ba7a..c8556305acad 100644
--- a/include/crypto/ecdh.h
+++ b/include/crypto/ecdh.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2016, Intel Corporation
  * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
+ * Copyright (c) 2017, NVIDIA Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -13,24 +14,19 @@
 #ifndef _CRYPTO_ECDH_
 #define _CRYPTO_ECDH_
 
+#include <crypto/ecc.h>
+
 /**
  * DOC: ECDH Helper Functions
  *
  * To use ECDH with the KPP cipher API, the following data structure and
  * functions should be used.
  *
- * The ECC curves known to the ECDH implementation are specified in this
- * header file.
- *
  * To use ECDH with KPP, the following functions should be used to operate on
  * an ECDH private key. The packet private key that can be set with
  * the KPP API function call of crypto_kpp_set_secret.
  */
 
-/* Curves IDs */
-#define ECC_CURVE_NIST_P192	0x0001
-#define ECC_CURVE_NIST_P256	0x0002
-
 /**
  * struct ecdh - define an ECDH private key
  *
-- 
1.7.6.3

^ permalink raw reply related

* [PATCH 4/6] crypto: ecdsa: add ECDSA SW implementation
From: Nitin Kumbhar @ 2017-01-20 11:35 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

This adds support for ECDSA algorithm. This implementation supports
sign and verify functions for ECDSA algorithm using akcipher. As ECDSA
is a signing algorithm dummy functions are added for encrypt() and
decrypt().

Helper routines for parsing public and private ECC keys for ECDSA are
added as well.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/Kconfig            |    7 +
 crypto/Makefile           |    3 +
 crypto/ecdsa.c            |  331 +++++++++++++++++++++++++++++++++++++++++++++
 crypto/ecdsa_helper.c     |  116 ++++++++++++++++
 include/crypto/akcipher.h |    5 +-
 include/crypto/ecdsa.h    |   29 ++++
 6 files changed, 490 insertions(+), 1 deletions(-)
 create mode 100644 crypto/ecdsa.c
 create mode 100644 crypto/ecdsa_helper.c
 create mode 100644 include/crypto/ecdsa.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index e240075d6f46..1c5c236b3bbc 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -140,6 +140,13 @@ config CRYPTO_ECDH
 	help
 	  Generic implementation of the ECDH algorithm
 
+config CRYPTO_ECDSA
+	tristate "ECDSA algorithm"
+	select CRYPTO_AKCIPHER
+	select CRYPTO_ECC
+	help
+	  Generic implementation of the ECDSA algorithm
+
 config CRYPTO_MANAGER
 	tristate "Cryptographic algorithm manager"
 	select CRYPTO_MANAGER2
diff --git a/crypto/Makefile b/crypto/Makefile
index 827740a47a37..9c13eb2ade6a 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -38,6 +38,9 @@ obj-$(CONFIG_CRYPTO_ECC) += ecc.o
 ecdh_generic-y := ecdh.o
 ecdh_generic-y += ecdh_helper.o
 obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
+ecdsa_generic-y := ecdsa.o
+ecdsa_generic-y += ecdsa_helper.o
+obj-$(CONFIG_CRYPTO_ECDSA) += ecdsa_generic.o
 
 $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
 $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c
new file mode 100644
index 000000000000..d415900af3bd
--- /dev/null
+++ b/crypto/ecdsa.c
@@ -0,0 +1,331 @@
+/*
+ * ECDSA generic algorithm
+ *
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/random.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/ecdsa.h>
+
+#include "ecc.h"
+
+struct ecdsa_ctx {
+	unsigned int curve_id;
+	unsigned int ndigits;
+	u64 private_key[ECC_MAX_DIGITS];
+	u64 public_key[2 * ECC_MAX_DIGITS];
+};
+
+static inline struct ecdsa_ctx *ecdsa_get_ctx(struct crypto_akcipher *tfm)
+{
+	return akcipher_tfm_ctx(tfm);
+}
+
+static void ecdsa_parse_msg_hash(struct akcipher_request *req, u64 *msg,
+				 unsigned int ndigits)
+{
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	unsigned int hash_len, hash_off;
+	unsigned char *hash, *msg_ptr;
+	int i;
+
+	/*
+	 * If hash_len == nbytes:
+	 *	copy nbytes from req
+	 * If hash_len > nbytes:
+	 *	copy left most nbytes from hash ignoring LSBs
+	 * If hash_len < nbytes:
+	 *	copy hash_len from req and zero remaining bytes
+	 *	(nbytes - hash_len)
+	 */
+	hash_len = req->src[0].length;
+	hash_off = hash_len <= nbytes ? 0 : hash_len - nbytes;
+
+	msg_ptr = (unsigned char *)msg;
+	hash = sg_virt(&req->src[0]);
+
+	for (i = hash_off; i < hash_len; i++)
+		*msg_ptr++ = hash[i];
+	for (; i < nbytes; i++)
+		*msg_ptr++ = 0;
+}
+
+int ecdsa_sign(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
+	unsigned int ndigits = ctx->ndigits;
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	unsigned int curve_id = ctx->curve_id;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+	struct ecc_point *x1y1 = NULL;
+	u64 z[ndigits], d[ndigits];
+	u64 k[ndigits], k_inv[ndigits];
+	u64 r[ndigits], s[ndigits];
+	u64 dr[ndigits], zdr[ndigits];
+	u8 *r_ptr, *s_ptr;
+
+	if (req->dst_len < 2 * nbytes) {
+		req->dst_len = 2 * nbytes;
+		return -EINVAL;
+	}
+
+	ecdsa_parse_msg_hash(req, z, ndigits);
+
+	/* d */
+	vli_set(d, (const u64 *)ctx->private_key, ndigits);
+
+	/* k */
+#if defined(CONFIG_CRYPTO_MANAGER2)
+	if (req->info) {
+		vli_copy_from_buf(k, ndigits, req->info, nbytes);
+	} else
+#endif
+		get_random_bytes(k, nbytes);
+
+	x1y1 = ecc_alloc_point(ndigits);
+	if (!x1y1)
+		return -ENOMEM;
+
+	/* (x1, y1) = k x G */
+	ecc_point_mult(x1y1, &curve->g, k, NULL, curve->p, ndigits);
+
+	/* r = x1 mod n */
+	vli_mod(r, x1y1->x, curve->n, ndigits);
+
+	/* k^-1 */
+	vli_mod_inv(k_inv, k, curve->n, ndigits);
+
+	/* d . r mod n */
+	vli_mod_mult(dr, d, r, curve->n, ndigits);
+
+	/* z + dr mod n */
+	vli_mod_add(zdr, z, dr, curve->n, ndigits);
+
+	/* k^-1 . ( z + dr) mod n */
+	vli_mod_mult(s, k_inv, zdr, curve->n, ndigits);
+
+	/* write signature (r,s) in dst */
+	r_ptr = sg_virt(req->dst);
+	s_ptr = (u8 *)sg_virt(req->dst) + nbytes;
+
+	vli_copy_to_buf(r_ptr, nbytes, r, ndigits);
+	vli_copy_to_buf(s_ptr, nbytes, s, ndigits);
+
+	req->dst_len = 2 * nbytes;
+
+	ecc_free_point(x1y1);
+	return 0;
+}
+
+int ecdsa_verify(struct akcipher_request *req)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
+	unsigned int ndigits = ctx->ndigits;
+	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+	unsigned int curve_id = ctx->curve_id;
+	const struct ecc_curve *curve = ecc_get_curve(curve_id);
+	struct ecc_point *x1y1 = NULL, *x2y2 = NULL, *Q = NULL;
+	u64 r[ndigits], s[ndigits], v[ndigits];
+	u64 z[ndigits], w[ndigits];
+	u64 u1[ndigits], u2[ndigits];
+	u64 x1[ndigits], x2[ndigits];
+	u64 y1[ndigits], y2[ndigits];
+	u64 *ctx_qx, *ctx_qy;
+	int ret;
+
+	x1y1 = ecc_alloc_point(ndigits);
+	x2y2 = ecc_alloc_point(ndigits);
+	Q = ecc_alloc_point(ndigits);
+	if (!x1y1 || !x2y2 || !Q) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+
+	ecdsa_parse_msg_hash(req, z, ndigits);
+
+	/* Signature r,s */
+	vli_copy_from_buf(r, ndigits, sg_virt(&req->src[1]), nbytes);
+	vli_copy_from_buf(s, ndigits, sg_virt(&req->src[2]), nbytes);
+
+	/* w = s^-1 mod n */
+	vli_mod_inv(w, s, curve->n, ndigits);
+
+	/* u1 = zw mod n */
+	vli_mod_mult(u1, z, w, curve->n, ndigits);
+
+	/* u2 = rw mod n */
+	vli_mod_mult(u2, r, w, curve->n, ndigits);
+
+	/* u1 . G */
+	ecc_point_mult(x1y1, &curve->g, u1, NULL, curve->p, ndigits);
+
+	/* Q=(Qx,Qy) */
+	ctx_qx = ctx->public_key;
+	ctx_qy = ctx_qx + ECC_MAX_DIGITS;
+	vli_set(Q->x, ctx_qx, ndigits);
+	vli_set(Q->y, ctx_qy, ndigits);
+
+	/* u2 x Q */
+	ecc_point_mult(x2y2, Q, u2, NULL, curve->p, ndigits);
+
+	vli_set(x1, x1y1->x, ndigits);
+	vli_set(y1, x1y1->y, ndigits);
+	vli_set(x2, x2y2->x, ndigits);
+	vli_set(y2, x2y2->y, ndigits);
+
+	/* x1y1 + x2y2 => P + Q; P + Q in x2 y2 */
+	ecc_point_add(x1, y1, x2, y2, curve->p, ndigits);
+
+	/* v = x mod n */
+	vli_mod(v, x2, curve->n, ndigits);
+
+	/* validate signature */
+	ret = vli_cmp(v, r, ndigits) == 0 ? 0 : -EBADMSG;
+ exit:
+	ecc_free_point(x1y1);
+	ecc_free_point(x2y2);
+	ecc_free_point(Q);
+	return ret;
+}
+
+int ecdsa_dummy_enc(struct akcipher_request *req)
+{
+	return -EINVAL;
+}
+
+int ecdsa_dummy_dec(struct akcipher_request *req)
+{
+	return -EINVAL;
+}
+
+int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
+		      unsigned int keylen)
+{
+	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
+	struct ecdsa params;
+	unsigned int ndigits;
+	unsigned int nbytes;
+	u8 *params_qx, *params_qy;
+	u64 *ctx_qx, *ctx_qy;
+	int ret = 0;
+
+	if (crypto_ecdsa_parse_pub_key(key, keylen, &params))
+		return -EINVAL;
+
+	ndigits = ecdsa_supported_curve(params.curve_id);
+
+	ctx->curve_id = params.curve_id;
+	ctx->ndigits = ndigits;
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	params_qx = params.key;
+	params_qy = params_qx + ECC_MAX_DIGIT_BYTES;
+
+	ctx_qx = ctx->public_key;
+	ctx_qy = ctx_qx + ECC_MAX_DIGITS;
+
+	vli_copy_from_buf(ctx_qx, ndigits, params_qx, nbytes);
+	vli_copy_from_buf(ctx_qy, ndigits, params_qy, nbytes);
+
+	memset(&params, 0, sizeof(params));
+	return ret;
+}
+
+int ecdsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
+		       unsigned int keylen)
+{
+	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
+	struct ecdsa params;
+	unsigned int ndigits;
+	unsigned int nbytes;
+
+	if (crypto_ecdsa_parse_priv_key(key, keylen, &params))
+		return -EINVAL;
+
+	ndigits = ecdsa_supported_curve(params.curve_id);
+
+	ctx->curve_id = params.curve_id;
+	ctx->ndigits = ndigits;
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
+			     (const u8 *)params.key, params.key_size) < 0)
+		return -EINVAL;
+
+	vli_copy_from_buf(ctx->private_key, ndigits, params.key, nbytes);
+
+	memset(&params, 0, sizeof(params));
+	return 0;
+}
+
+int ecdsa_max_size(struct crypto_akcipher *tfm)
+{
+	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
+	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	/* For r,s */
+	return 2 * nbytes;
+}
+
+int ecdsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	return 0;
+}
+
+void ecdsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+}
+
+static struct akcipher_alg ecdsa_alg = {
+	.sign		= ecdsa_sign,
+	.verify		= ecdsa_verify,
+	.encrypt	= ecdsa_dummy_enc,
+	.decrypt	= ecdsa_dummy_dec,
+	.set_priv_key	= ecdsa_set_priv_key,
+	.set_pub_key	= ecdsa_set_pub_key,
+	.max_size	= ecdsa_max_size,
+	.init		= ecdsa_init_tfm,
+	.exit		= ecdsa_exit_tfm,
+	.base = {
+		.cra_name	= "ecdsa",
+		.cra_driver_name = "ecdsa-generic",
+		.cra_priority	= 100,
+		.cra_module	= THIS_MODULE,
+		.cra_ctxsize	= sizeof(struct ecdsa_ctx),
+	},
+};
+
+static int ecdsa_init(void)
+{
+	int ret;
+
+	ret = crypto_register_akcipher(&ecdsa_alg);
+	if (ret)
+		pr_err("ecdsa alg register failed. err:%d\n", ret);
+	return ret;
+}
+
+static void ecdsa_exit(void)
+{
+	crypto_unregister_akcipher(&ecdsa_alg);
+}
+
+module_init(ecdsa_init);
+module_exit(ecdsa_exit);
+
+MODULE_ALIAS_CRYPTO("ecdsa");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ECDSA Generic Algorithm");
+MODULE_AUTHOR("NVIDIA Corporation");
diff --git a/crypto/ecdsa_helper.c b/crypto/ecdsa_helper.c
new file mode 100644
index 000000000000..d31eb54431a9
--- /dev/null
+++ b/crypto/ecdsa_helper.c
@@ -0,0 +1,116 @@
+/*
+ * ECDSA helper routines
+ *
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <crypto/ecdsa.h>
+
+#include "ecc.h"
+
+#define ECDSA_KEY_MIN_SIZE	(1 + 1 + 24) /* ver + cid + n (P-192) */
+
+unsigned int ecdsa_supported_curve(unsigned int curve_id)
+{
+	switch (curve_id) {
+	case ECC_CURVE_NIST_P192: return 3;
+	case ECC_CURVE_NIST_P256: return 4;
+	default: return 0;
+	}
+}
+
+static inline u8 *ecdsa_pack_data(void *dst, const void *src, size_t sz)
+{
+	memcpy(dst, src, sz);
+	return dst + sz;
+}
+
+static inline const u8 *ecdsa_unpack_data(void *dst, const void *src, size_t sz)
+{
+	memcpy(dst, src, sz);
+	return src + sz;
+}
+
+int crypto_ecdsa_parse_pub_key(const char *buf, unsigned int len,
+			       struct ecdsa *params)
+{
+	unsigned char version;
+	unsigned int ndigits;
+	unsigned int nbytes;
+	const u8 *ptr = buf;
+	u8 *qx, *qy;
+
+	if (unlikely(!buf) || len < ECDSA_KEY_MIN_SIZE)
+		return -EINVAL;
+
+	ptr = ecdsa_unpack_data(&version, ptr, sizeof(version));
+	if (version != 1)
+		return -EINVAL;
+
+	ptr = ecdsa_unpack_data(&params->curve_id, ptr,
+				sizeof(params->curve_id));
+
+	ndigits = ecdsa_supported_curve(params->curve_id);
+	if (!ndigits)
+		return -EINVAL;
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	/* skip private key */
+	ptr = ecdsa_unpack_data(&params->key, ptr, nbytes);
+
+	/* copy public key */
+	qx = params->key;
+	qy = qx + ECC_MAX_DIGIT_BYTES;
+
+	ptr = ecdsa_unpack_data(qx, ptr, nbytes);
+	ptr = ecdsa_unpack_data(qy, ptr, nbytes);
+
+	params->key_size = 2 * nbytes;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_ecdsa_parse_pub_key);
+
+int crypto_ecdsa_parse_priv_key(const char *buf, unsigned int len,
+				struct ecdsa *params)
+{
+	unsigned char version;
+	unsigned int ndigits;
+	unsigned int nbytes;
+	const u8 *ptr = buf;
+
+	if (unlikely(!buf) || len < ECDSA_KEY_MIN_SIZE)
+		return -EINVAL;
+
+	ptr = ecdsa_unpack_data(&version, ptr, sizeof(version));
+	if (version != 1)
+		return -EINVAL;
+
+	ptr = ecdsa_unpack_data(&params->curve_id, ptr,
+				sizeof(params->curve_id));
+
+	ndigits = ecdsa_supported_curve(params->curve_id);
+	if (!ndigits)
+		return -EINVAL;
+
+	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+	params->key_size = nbytes;
+
+	/* copy private key */
+	ptr = ecdsa_unpack_data(&params->key, ptr, nbytes);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(crypto_ecdsa_parse_priv_key);
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index c37cc59e9bf2..6b34e9043a6f 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -3,6 +3,7 @@
  *
  * Copyright (c) 2015, Intel Corporation
  * Authors: Tadeusz Struk <tadeusz.struk@intel.com>
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -27,6 +28,7 @@
  *		result.
  *		In case of error where the dst sgl size was insufficient,
  *		it will be updated to the size required for the operation.
+ * @info:	Any request specific data needed to process the request.
  * @__ctx:	Start of private context data
  */
 struct akcipher_request {
@@ -35,6 +37,7 @@ struct akcipher_request {
 	struct scatterlist *dst;
 	unsigned int src_len;
 	unsigned int dst_len;
+	void *info;
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
@@ -193,7 +196,7 @@ static inline void crypto_free_akcipher(struct crypto_akcipher *tfm)
 {
 	struct akcipher_request *req;
 
-	req = kmalloc(sizeof(*req) + crypto_akcipher_reqsize(tfm), gfp);
+	req = kzalloc(sizeof(*req) + crypto_akcipher_reqsize(tfm), gfp);
 	if (likely(req))
 		akcipher_request_set_tfm(req, tfm);
 
diff --git a/include/crypto/ecdsa.h b/include/crypto/ecdsa.h
new file mode 100644
index 000000000000..c71d21e654b9
--- /dev/null
+++ b/include/crypto/ecdsa.h
@@ -0,0 +1,29 @@
+/*
+ * ECC parameters for ECDSA
+ *
+ * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _CRYPTO_ECDSA_
+#define _CRYPTO_ECDSA_
+
+#include <crypto/ecc.h>
+
+struct ecdsa {
+	unsigned char curve_id;
+	unsigned char key[2 * ECC_MAX_DIGIT_BYTES];
+	unsigned short key_size;
+};
+
+unsigned int ecdsa_supported_curve(unsigned int curve_id);
+int crypto_ecdsa_parse_pub_key(const char *buf, unsigned int len,
+			       struct ecdsa *params);
+int crypto_ecdsa_parse_priv_key(const char *buf, unsigned int len,
+				struct ecdsa *params);
+#endif /* _CRYPTO_ECDSA_ */
-- 
1.7.6.3

^ permalink raw reply related

* [PATCH 3/6] crypto: ecc: export vli and ecc ops
From: Nitin Kumbhar @ 2017-01-20 11:35 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

Export vli and ECC related functions so that these can
be used by all ECC algorithm modules like ECDH, ECDSA and ECIES.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/ecc.c |  114 +++++++++++++++++++++++++++++++++++++---------------------
 crypto/ecc.h |   53 +++++++++++++++++++++++++++
 2 files changed, 126 insertions(+), 41 deletions(-)

diff --git a/crypto/ecc.c b/crypto/ecc.c
index 6ad785c4c12a..c6fe1b7b998d 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -77,7 +77,7 @@
 	.n = nist_p256_n
 };
 
-static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
+const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
 {
 	switch (curve_id) {
 	/* In FIPS mode only allow P256 and higher */
@@ -89,6 +89,7 @@
 		return NULL;
 	}
 }
+EXPORT_SYMBOL_GPL(ecc_get_curve);
 
 static u64 *ecc_alloc_digits_space(unsigned int ndigits)
 {
@@ -105,7 +106,7 @@ static void ecc_free_digits_space(u64 *space)
 	kzfree(space);
 }
 
-static struct ecc_point *ecc_alloc_point(unsigned int ndigits)
+struct ecc_point *ecc_alloc_point(unsigned int ndigits)
 {
 	struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
 
@@ -130,8 +131,9 @@ static void ecc_free_digits_space(u64 *space)
 	kfree(p);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(ecc_alloc_point);
 
-static void ecc_free_point(struct ecc_point *p)
+void ecc_free_point(struct ecc_point *p)
 {
 	if (!p)
 		return;
@@ -140,17 +142,19 @@ static void ecc_free_point(struct ecc_point *p)
 	kzfree(p->y);
 	kzfree(p);
 }
+EXPORT_SYMBOL_GPL(ecc_free_point);
 
-static void vli_clear(u64 *vli, unsigned int ndigits)
+void vli_clear(u64 *vli, unsigned int ndigits)
 {
 	int i;
 
 	for (i = 0; i < ndigits; i++)
 		vli[i] = 0;
 }
+EXPORT_SYMBOL_GPL(vli_clear);
 
 /* Returns true if vli == 0, false otherwise. */
-static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
+bool vli_is_zero(const u64 *vli, unsigned int ndigits)
 {
 	int i;
 
@@ -161,15 +165,17 @@ static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(vli_is_zero);
 
 /* Returns nonzero if bit bit of vli is set. */
-static u64 vli_test_bit(const u64 *vli, unsigned int bit)
+u64 vli_test_bit(const u64 *vli, unsigned int bit)
 {
 	return (vli[bit / 64] & ((u64)1 << (bit % 64)));
 }
+EXPORT_SYMBOL_GPL(vli_test_bit);
 
 /* Counts the number of 64-bit "digits" in vli. */
-static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
+unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
 {
 	int i;
 
@@ -181,9 +187,10 @@ static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
 
 	return (i + 1);
 }
+EXPORT_SYMBOL_GPL(vli_num_digits);
 
 /* Counts the number of bits required for vli. */
-static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
+unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
 {
 	unsigned int i, num_digits;
 	u64 digit;
@@ -198,15 +205,17 @@ static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
 
 	return ((num_digits - 1) * 64 + i);
 }
+EXPORT_SYMBOL_GPL(vli_num_bits);
 
 /* Sets dest = src. */
-static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
+void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
 {
 	int i;
 
 	for (i = 0; i < ndigits; i++)
 		dest[i] = src[i];
 }
+EXPORT_SYMBOL_GPL(vli_set);
 
 /* Copy from vli to buf.
  * For buffers smaller than vli: copy only LSB nbytes from vli.
@@ -225,6 +234,7 @@ void vli_copy_to_buf(u8 *dst_buf, unsigned int buf_len,
 	for (; i < buf_len; i++)
 		dst_buf[i] = 0;
 }
+EXPORT_SYMBOL_GPL(vli_copy_to_buf);
 
 /* Copy from buffer to vli.
  * For buffers smaller than vli: fill up remaining vli with zeroes.
@@ -243,9 +253,10 @@ void vli_copy_from_buf(u64 *dst_vli, unsigned int ndigits,
 	for (; i < nbytes; i++)
 		vli[i] = 0;
 }
+EXPORT_SYMBOL_GPL(vli_copy_from_buf);
 
 /* Returns sign of left - right. */
-static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
+int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
 {
 	int i;
 
@@ -258,12 +269,13 @@ static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(vli_cmp);
 
 /* Computes result = in << c, returning carry. Can modify in place
  * (if result == in). 0 < shift < 64.
  */
-static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
-		      unsigned int ndigits)
+u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
+	       unsigned int ndigits)
 {
 	u64 carry = 0;
 	int i;
@@ -277,9 +289,10 @@ static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
 
 	return carry;
 }
+EXPORT_SYMBOL_GPL(vli_lshift);
 
 /* Computes vli = vli >> 1. */
-static void vli_rshift1(u64 *vli, unsigned int ndigits)
+void vli_rshift1(u64 *vli, unsigned int ndigits)
 {
 	u64 *end = vli;
 	u64 carry = 0;
@@ -292,10 +305,11 @@ static void vli_rshift1(u64 *vli, unsigned int ndigits)
 		carry = temp << 63;
 	}
 }
+EXPORT_SYMBOL_GPL(vli_rshift1);
 
 /* Computes result = left + right, returning carry. Can modify in place. */
-static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
-		   unsigned int ndigits)
+u64 vli_add(u64 *result, const u64 *left, const u64 *right,
+	    unsigned int ndigits)
 {
 	u64 carry = 0;
 	int i;
@@ -312,10 +326,11 @@ static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
 
 	return carry;
 }
+EXPORT_SYMBOL_GPL(vli_add);
 
 /* Computes result = left - right, returning borrow. Can modify in place. */
-static u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
-		   unsigned int ndigits)
+u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
+	    unsigned int ndigits)
 {
 	u64 borrow = 0;
 	int i;
@@ -332,6 +347,7 @@ static u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
 
 	return borrow;
 }
+EXPORT_SYMBOL_GPL(vli_sub);
 
 static uint128_t mul_64_64(u64 left, u64 right)
 {
@@ -368,8 +384,8 @@ static uint128_t add_128_128(uint128_t a, uint128_t b)
 	return result;
 }
 
-static void vli_mult(u64 *result, const u64 *left, const u64 *right,
-		     unsigned int ndigits)
+void vli_mult(u64 *result, const u64 *left, const u64 *right,
+	      unsigned int ndigits)
 {
 	uint128_t r01 = { 0, 0 };
 	u64 r2 = 0;
@@ -403,8 +419,9 @@ static void vli_mult(u64 *result, const u64 *left, const u64 *right,
 
 	result[ndigits * 2 - 1] = r01.m_low;
 }
+EXPORT_SYMBOL_GPL(vli_mult);
 
-static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
+void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
 {
 	uint128_t r01 = { 0, 0 };
 	u64 r2 = 0;
@@ -442,12 +459,13 @@ static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
 
 	result[ndigits * 2 - 1] = r01.m_low;
 }
+EXPORT_SYMBOL_GPL(vli_square);
 
 /* Computes result = (left + right) % mod.
  * Assumes that left < mod and right < mod, result != mod.
  */
-static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
-			const u64 *mod, unsigned int ndigits)
+void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
+		 const u64 *mod, unsigned int ndigits)
 {
 	u64 carry;
 
@@ -459,12 +477,13 @@ static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
 	if (carry || vli_cmp(result, mod, ndigits) >= 0)
 		vli_sub(result, result, mod, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod_add);
 
 /* Computes result = (left - right) % mod.
  * Assumes that left < mod and right < mod, result != mod.
  */
-static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
-			const u64 *mod, unsigned int ndigits)
+void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
+		 const u64 *mod, unsigned int ndigits)
 {
 	u64 borrow = vli_sub(result, left, right, ndigits);
 
@@ -475,6 +494,7 @@ static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
 	if (borrow)
 		vli_add(result, result, mod, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod_sub);
 
 /* Computes result = input % mod.
  * Assumes that input < mod, result != mod.
@@ -487,6 +507,7 @@ void vli_mod(u64 *result, const u64 *input, const u64 *mod,
 	else
 		vli_set(result, input, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod);
 
 /* Print vli in big-endian format.
  * The bytes are printed in hex.
@@ -507,6 +528,7 @@ void vli_print(char *vli_name, const u64 *vli, unsigned int ndigits)
 
 	pr_info("%20s(BigEnd)=%s\n", vli_name, buf);
 }
+EXPORT_SYMBOL_GPL(vli_print);
 
 /* Computes result = (left * right) % mod.
  * Assumes that left < mod and right < mod, result != mod.
@@ -552,6 +574,7 @@ void vli_mod_mult(u64 *result, const u64 *left, const u64 *right,
 			vli_sub(aa, aa, t1, ndigits);
 	}
 }
+EXPORT_SYMBOL_GPL(vli_mod_mult);
 
 /* Computes p_result = p_product % curve_p.
  * See algorithm 5 and 6 from
@@ -663,8 +686,8 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product,
 /* Computes result = product % curve_prime
  *  from http://www.nsa.gov/ia/_files/nist-routines.pdf
 */
-static bool vli_mmod_fast(u64 *result, u64 *product,
-			  const u64 *curve_prime, unsigned int ndigits)
+bool vli_mmod_fast(u64 *result, u64 *product,
+		   const u64 *curve_prime, unsigned int ndigits)
 {
 	u64 tmp[2 * ndigits];
 
@@ -682,34 +705,37 @@ static bool vli_mmod_fast(u64 *result, u64 *product,
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(vli_mmod_fast);
 
 /* Computes result = (left * right) % curve_prime. */
-static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
-			      const u64 *curve_prime, unsigned int ndigits)
+void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
+		       const u64 *curve_prime, unsigned int ndigits)
 {
 	u64 product[2 * ndigits];
 
 	vli_mult(product, left, right, ndigits);
 	vli_mmod_fast(result, product, curve_prime, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod_mult_fast);
 
 /* Computes result = left^2 % curve_prime. */
-static void vli_mod_square_fast(u64 *result, const u64 *left,
-				const u64 *curve_prime, unsigned int ndigits)
+void vli_mod_square_fast(u64 *result, const u64 *left,
+			 const u64 *curve_prime, unsigned int ndigits)
 {
 	u64 product[2 * ndigits];
 
 	vli_square(product, left, ndigits);
 	vli_mmod_fast(result, product, curve_prime, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod_square_fast);
 
 #define EVEN(vli) (!(vli[0] & 1))
 /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
  * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
  * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
  */
-static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
-			unsigned int ndigits)
+void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
+		 unsigned int ndigits)
 {
 	u64 a[ndigits], b[ndigits];
 	u64 u[ndigits], v[ndigits];
@@ -781,23 +807,25 @@ static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
 
 	vli_set(result, u, ndigits);
 }
+EXPORT_SYMBOL_GPL(vli_mod_inv);
 
 /* ------ Point operations ------ */
 
 /* Returns true if p_point is the point at infinity, false otherwise. */
-static bool ecc_point_is_zero(const struct ecc_point *point)
+bool ecc_point_is_zero(const struct ecc_point *point)
 {
 	return (vli_is_zero(point->x, point->ndigits) &&
 		vli_is_zero(point->y, point->ndigits));
 }
+EXPORT_SYMBOL_GPL(ecc_point_is_zero);
 
 /* Point multiplication algorithm using Montgomery's ladder with co-Z
  * coordinates. From http://eprint.iacr.org/2011/338.pdf
  */
 
 /* Double in place */
-static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
-				      u64 *curve_prime, unsigned int ndigits)
+void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
+			       u64 *curve_prime, unsigned int ndigits)
 {
 	/* t1 = x, t2 = y, t3 = z */
 	u64 t4[ndigits];
@@ -857,6 +885,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
 	vli_set(z1, y1, ndigits);
 	vli_set(y1, t4, ndigits);
 }
+EXPORT_SYMBOL_GPL(ecc_point_double_jacobian);
 
 /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
 static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
@@ -1045,11 +1074,12 @@ void ecc_point_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
 	vli_mod_mult_fast(t5, t5, t7,  curve_prime, ndigits);
 	vli_set(x2, t5, ndigits);
 }
+EXPORT_SYMBOL_GPL(ecc_point_add);
 
-static void ecc_point_mult(struct ecc_point *result,
-			   const struct ecc_point *point, const u64 *scalar,
-			   u64 *initial_z, u64 *curve_prime,
-			   unsigned int ndigits)
+void ecc_point_mult(struct ecc_point *result,
+		    const struct ecc_point *point, const u64 *scalar,
+		    u64 *initial_z, u64 *curve_prime,
+		    unsigned int ndigits)
 {
 	/* R0 and R1 */
 	u64 rx[2][ndigits];
@@ -1100,15 +1130,16 @@ static void ecc_point_mult(struct ecc_point *result,
 	vli_set(result->x, rx[0], ndigits);
 	vli_set(result->y, ry[0], ndigits);
 }
+EXPORT_SYMBOL_GPL(ecc_point_mult);
 
-static inline void ecc_swap_digits(const u64 *in, u64 *out,
-				   unsigned int ndigits)
+void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits)
 {
 	int i;
 
 	for (i = 0; i < ndigits; i++)
 		out[i] = __swab64(in[ndigits - 1 - i]);
 }
+EXPORT_SYMBOL_GPL(ecc_swap_digits);
 
 int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 		     const u8 *private_key, unsigned int private_key_len)
@@ -1133,3 +1164,4 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ecc_is_key_valid);
diff --git a/crypto/ecc.h b/crypto/ecc.h
index 5db82223d485..0f907a860d0b 100644
--- a/crypto/ecc.h
+++ b/crypto/ecc.h
@@ -31,6 +31,59 @@
 
 #include "ecc_curve_defs.h"
 
+const struct ecc_curve *ecc_get_curve(unsigned int curve_id);
+struct ecc_point *ecc_alloc_point(unsigned int ndigits);
+void ecc_free_point(struct ecc_point *p);
+
+void vli_clear(u64 *vli, unsigned int ndigits);
+bool vli_is_zero(const u64 *vli, unsigned int ndigits);
+unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits);
+unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits);
+void vli_set(u64 *dest, const u64 *src, unsigned int ndigits);
+void vli_copy_to_buf(u8 *dst_buf, unsigned int buf_len,
+		     const u64 *src_vli, unsigned int ndigits);
+void vli_copy_from_buf(u64 *dst_vli, unsigned int ndigits,
+		       const u8 *src_buf, unsigned int buf_len);
+int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits);
+u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
+	       unsigned int ndigits);
+void vli_rshift1(u64 *vli, unsigned int ndigits);
+u64 vli_add(u64 *result, const u64 *left, const u64 *right,
+	    unsigned int ndigits);
+u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
+	    unsigned int ndigits);
+void vli_mult(u64 *result, const u64 *left, const u64 *right,
+	      unsigned int ndigits);
+void vli_square(u64 *result, const u64 *left, unsigned int ndigits);
+void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
+		 const u64 *mod, unsigned int ndigits);
+void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
+		 const u64 *mod, unsigned int ndigits);
+void vli_mod(u64 *result, const u64 *input, const u64 *mod,
+	     unsigned int ndigits);
+void vli_print(char *vli_name, const u64 *vli, unsigned int ndigits);
+void vli_mod_mult(u64 *result, const u64 *left, const u64 *right,
+		  const u64 *mod, unsigned int ndigits);
+bool vli_mmod_fast(u64 *result, u64 *product,
+		   const u64 *curve_prime, unsigned int ndigits);
+void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
+		       const u64 *curve_prime, unsigned int ndigits);
+void vli_mod_square_fast(u64 *result, const u64 *left,
+			 const u64 *curve_prime, unsigned int ndigits);
+void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
+		 unsigned int ndigits);
+
+bool ecc_point_is_zero(const struct ecc_point *point);
+void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
+			       u64 *curve_prime, unsigned int ndigits);
+void ecc_point_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
+		   unsigned int ndigits);
+void ecc_point_mult(struct ecc_point *result,
+		    const struct ecc_point *point, const u64 *scalar,
+		    u64 *initial_z, u64 *curve_prime,
+		    unsigned int ndigits);
+void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits);
+
 /**
  * ecc_is_key_valid() - Validate a given ECDH private key
  *
-- 
1.7.6.3

^ permalink raw reply related

* [PATCH 0/6] Add support for ECDSA algorithm
From: Nitin Kumbhar @ 2017-01-20 11:35 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar

Hello,

This patch series adds support for Elliptic Curve Digital Signature
Algorithm (ECDSA). To reuse existing ECC functionality, which is
added as part of ECDH, it separates out ECC and ECDH so that
only ECC functionality is available for ECDSA even when ECDH is in
a disabled state.

Patch #1 restructures ECC and ECDH code such that ECC is not
dependent on ECDH config.

Patches #2 & #3 add vli and ecc functions which are required
for other Elliptic curve algorithms like ECDSA and ECIES.

Patch #4 adds support for ECDSA. This has been validated for P192
and P256 elliptic curves.

Patches #5 and #6 add ECDSA tests to validate ECDSA functionality
and measure ECDSA performance.

Nitin Kumbhar (6):
  crypto: ecc: separate out ecc and ecdh
  crypto: ecc: add vli and ecc ops
  crypto: ecc: export vli and ecc ops
  crypto: ecdsa: add ECDSA SW implementation
  crypto: testmgr: add ECDSA tests
  crypto: tcrypt: add ECDSA test modes

 crypto/Kconfig            |   14 ++
 crypto/Makefile           |    8 +-
 crypto/ecc.c              |  415 ++++++++++++++++++++++++++++-------------
 crypto/ecc.h              |   98 ++++++----
 crypto/ecc_curve_defs.h   |   51 +----
 crypto/ecc_ecdh.h         |   54 ++++++
 crypto/ecdh.c             |    4 +-
 crypto/ecdh_helper.c      |   94 ++++++++++
 crypto/ecdsa.c            |  331 +++++++++++++++++++++++++++++++++
 crypto/ecdsa_helper.c     |  116 ++++++++++++
 crypto/tcrypt.c           |  250 ++++++++++++++++++++++++-
 crypto/tcrypt.h           |  122 ++++++++++++
 crypto/testmgr.c          |  452 ++++++++++++++++++++++++++++++++++++++++++++-
 crypto/testmgr.h          |  140 ++++++++++++++
 include/crypto/akcipher.h |    5 +-
 include/crypto/ecc.h      |   24 +++
 include/crypto/ecdh.h     |   10 +-
 include/crypto/ecdsa.h    |   29 +++
 18 files changed, 1984 insertions(+), 233 deletions(-)
 create mode 100644 crypto/ecc_ecdh.h
 create mode 100644 crypto/ecdsa.c
 create mode 100644 crypto/ecdsa_helper.c
 create mode 100644 include/crypto/ecc.h
 create mode 100644 include/crypto/ecdsa.h

-- 
1.7.6.3

^ permalink raw reply

* [PATCH 5/6] crypto: testmgr: add ECDSA tests
From: Nitin Kumbhar @ 2017-01-20 11:36 UTC (permalink / raw)
  To: herbert, davem; +Cc: linux-crypto, Nitin Kumbhar
In-Reply-To: <1484912161-5932-1-git-send-email-nkumbhar@nvidia.com>

Update crypto test manager to include NIST ECDSA
test vectors and various ECDSA tests. These include
tests for ECDSA signing, ECDSA sign-verification,
ECDSA signing and verifying generated signatures and
invalidation of incorrect signatures.

Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
---
 crypto/testmgr.c |  452 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 crypto/testmgr.h |  140 +++++++++++++++++
 2 files changed, 589 insertions(+), 3 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 98eb09782db8..a1db28cbc32d 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) 2007 Nokia Siemens Networks
  * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2017 NVIDIA Corporation
  *
  * Updated RFC4106 AES-GCM testing.
  *    Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
@@ -2085,6 +2086,436 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver,
 	return err;
 }
 
+static int do_test_ecdsa_verify(struct crypto_akcipher *tfm,
+			      struct akcipher_testvec *vec)
+{
+	struct akcipher_request *req = NULL;
+	u8 *r_str = NULL, *s_str = NULL;
+	u8 *m_str = NULL;
+	struct scatterlist src_tab[3], dst;
+	struct tcrypt_result result;
+	unsigned int outbuf_maxlen;
+	u8 *outbuf = NULL;
+	unsigned int nbytes;
+	int err;
+
+	/* Alloc akcipher request */
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	/* Set private key */
+	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
+	if (err)
+		goto error;
+
+	/*
+	 * vec->c always contains k, R and S in that order. All are
+	 * of same size and are equal to n i.e. the order of
+	 * an elliptic curve.
+	 */
+	nbytes = vec->c_size / 3;
+
+	r_str = kzalloc(nbytes, GFP_KERNEL);
+	s_str = kzalloc(nbytes, GFP_KERNEL);
+	m_str = kzalloc(vec->m_size, GFP_KERNEL);
+	if (!r_str || !s_str || !m_str) {
+		err = -ENOMEM;
+		goto error;
+	}
+	memcpy(r_str, (u8 *)vec->c + nbytes, nbytes);
+	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
+	memcpy(m_str, vec->m, vec->m_size);
+
+	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
+	if (outbuf_maxlen < 0) {
+		err = outbuf_maxlen;
+		goto error;
+	}
+	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
+	if (!outbuf) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Set src and dst buffers */
+	sg_init_table(src_tab, 3);
+	sg_set_buf(&src_tab[0], m_str, vec->m_size);
+	sg_set_buf(&src_tab[1], r_str, nbytes);
+	sg_set_buf(&src_tab[2], s_str, nbytes);
+	sg_init_one(&dst, outbuf, outbuf_maxlen);
+
+	akcipher_request_set_crypt(req, src_tab, &dst,
+				   vec->m_size + 2 * nbytes, outbuf_maxlen);
+
+	/* Set up result callback */
+	init_completion(&result.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Run ecdsa verify operation on sig (r,s) */
+	err = wait_async_op(&result, crypto_akcipher_verify(req));
+	if (err) {
+		pr_err("alg: ecdsa: verify(rs) test failed. err %d\n", err);
+		goto error;
+	}
+error:
+	akcipher_request_free(req);
+	kfree(r_str);
+	kfree(s_str);
+	kfree(m_str);
+	kfree(outbuf);
+	return err;
+}
+
+static int do_test_ecdsa_invalid_verify(struct crypto_akcipher *tfm,
+					struct akcipher_testvec *vec)
+{
+	struct akcipher_request *req = NULL;
+	u8 *r_str = NULL, *s_str = NULL;
+	u8 *m_str = NULL;
+	struct scatterlist src_tab[3], dst;
+	struct tcrypt_result result;
+	unsigned int outbuf_maxlen;
+	u8 *outbuf = NULL;
+	unsigned int nbytes;
+	int err;
+
+	/* Alloc akcipher request */
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	/* Set private key */
+	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
+	if (err)
+		goto error;
+
+	/*
+	 * vec->c always contains k, R and S in that order. All are
+	 * of same size and are equal to n i.e. the order of
+	 * an elliptic curve.
+	 */
+	nbytes = vec->c_size / 3;
+
+	r_str = kzalloc(nbytes, GFP_KERNEL);
+	s_str = kzalloc(nbytes, GFP_KERNEL);
+	m_str = kzalloc(vec->m_size, GFP_KERNEL);
+	if (!r_str || !s_str || !m_str) {
+		err = -ENOMEM;
+		goto error;
+	}
+	memcpy(r_str, (u8 *)vec->c + 1 * nbytes, nbytes);
+	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
+	memcpy(m_str, vec->m, vec->m_size);
+
+	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
+	if (outbuf_maxlen < 0) {
+		err = outbuf_maxlen;
+		goto error;
+	}
+	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
+	if (!outbuf) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Set src and dst buffers */
+	sg_init_table(src_tab, 3);
+	/* Intentionally set m_size to 8 to have invalid hash */
+	sg_set_buf(&src_tab[0], m_str, 8);
+	sg_set_buf(&src_tab[1], r_str, nbytes);
+	sg_set_buf(&src_tab[2], s_str, nbytes);
+	sg_init_one(&dst, outbuf, outbuf_maxlen);
+
+	akcipher_request_set_crypt(req, src_tab, &dst,
+				   vec->m_size + 2 * nbytes, outbuf_maxlen);
+
+	/* Set up result callback */
+	init_completion(&result.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Run ecdsa verify operation on sig (r,s) */
+	err = wait_async_op(&result, crypto_akcipher_verify(req));
+	if (err != -EBADMSG) {
+		pr_err("alg: ecdsa: invalid verify test failed. err %d\n", err);
+		goto error;
+	}
+	err = 0;
+error:
+	akcipher_request_free(req);
+	kfree(r_str);
+	kfree(s_str);
+	kfree(m_str);
+	kfree(outbuf);
+	return err;
+}
+
+static int do_test_ecdsa_sign_verify(struct crypto_akcipher *tfm,
+				     struct akcipher_testvec *vec)
+{
+	struct akcipher_request *req = NULL;
+	u8 *r_str = NULL, *s_str = NULL;
+	u8 *m_str = NULL;
+	struct scatterlist src_tab[3];
+	struct scatterlist src, dst;
+	struct tcrypt_result result;
+	unsigned int outbuf_maxlen;
+	void *outbuf = NULL;
+	unsigned int nbytes;
+	int err;
+
+	/* Alloc akcipher request */
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	/* Set private key */
+	err = crypto_akcipher_set_priv_key(tfm, vec->key, vec->key_len);
+	if (err)
+		goto error;
+
+	/* Set private key */
+	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
+	if (err)
+		goto error;
+
+	/*
+	 * vec->c always contains k, R and S in that order. All are
+	 * of same size and are equal to n i.e. the order of
+	 * an elliptic curve.
+	 */
+	nbytes = vec->c_size / 3;
+
+	m_str = kzalloc(vec->m_size, GFP_KERNEL);
+	if (!m_str) {
+		err = -ENOMEM;
+		goto error;
+	}
+	memcpy(m_str, vec->m, vec->m_size);
+
+	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
+	if (outbuf_maxlen < 0) {
+		err = outbuf_maxlen;
+		goto error;
+	}
+	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
+	if (!outbuf) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	sg_init_one(&src, m_str, vec->m_size);
+	sg_init_one(&dst, outbuf, outbuf_maxlen);
+
+	akcipher_request_set_crypt(req, &src, &dst,
+				   vec->m_size, outbuf_maxlen);
+
+	/* Set up result callback */
+	init_completion(&result.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Run ecdsa sign operation on message digest */
+	err = wait_async_op(&result, crypto_akcipher_sign(req));
+	if (err) {
+		pr_err("alg: ecdsa: sign test failed. err %d\n", err);
+		goto error;
+	}
+
+	/* verify that signature (r,s) is valid */
+	if (req->dst_len != 2 * nbytes) {
+		pr_err("alg: ecdsa: sign test failed. Invalid sig len\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	/* output contains r and s */
+	r_str = outbuf;
+	s_str = (u8 *)outbuf + nbytes;
+
+	/* Set src and dst buffers */
+	sg_init_table(src_tab, 3);
+	sg_set_buf(&src_tab[0], m_str, vec->m_size);
+	sg_set_buf(&src_tab[1], r_str, nbytes);
+	sg_set_buf(&src_tab[2], s_str, nbytes);
+	sg_init_one(&dst, outbuf, outbuf_maxlen);
+
+	akcipher_request_set_crypt(req, src_tab, &dst,
+				   vec->m_size + 2 * nbytes, outbuf_maxlen);
+
+	/* Set up result callback */
+	init_completion(&result.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Run ecdsa verify operation on sig (r,s) */
+	err = wait_async_op(&result, crypto_akcipher_verify(req));
+	if (err) {
+		pr_err("alg: ecdsa: verify test failed. err %d\n", err);
+		goto error;
+	}
+error:
+	akcipher_request_free(req);
+	kfree(m_str);
+	kfree(outbuf);
+	return err;
+}
+
+static int do_test_ecdsa_sign(struct crypto_akcipher *tfm,
+			      struct akcipher_testvec *vec)
+{
+	struct akcipher_request *req = NULL;
+	u8 *r_str = NULL, *s_str = NULL;
+	u8 *k_str = NULL, *m_str = NULL;
+	struct scatterlist src, dst;
+	struct tcrypt_result result;
+	unsigned int outbuf_maxlen;
+	void *outbuf = NULL;
+	unsigned int nbytes;
+	int err;
+
+	/* Alloc akcipher request */
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	/* Set private key */
+	err = crypto_akcipher_set_priv_key(tfm, vec->key, vec->key_len);
+	if (err)
+		goto error;
+
+	/*
+	 * vec->c always contains k, R and S in that order. All are
+	 * of same size and are equal to n i.e. the order of
+	 * an elliptic curve.
+	 */
+	nbytes = vec->c_size / 3;
+
+	k_str = kzalloc(nbytes, GFP_KERNEL);
+	r_str = kzalloc(nbytes, GFP_KERNEL);
+	s_str = kzalloc(nbytes, GFP_KERNEL);
+	m_str = kzalloc(vec->m_size, GFP_KERNEL);
+	if (!k_str || !r_str || !s_str || !m_str) {
+		err = -ENOMEM;
+		goto error;
+	}
+	memcpy(k_str, (u8 *)vec->c + 0 * nbytes, nbytes);
+	memcpy(r_str, (u8 *)vec->c + 1 * nbytes, nbytes);
+	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
+	memcpy(m_str, vec->m, vec->m_size);
+
+	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
+	if (outbuf_maxlen < 0) {
+		err = outbuf_maxlen;
+		goto error;
+	}
+	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
+	if (!outbuf) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Set src and dst buffers */
+	sg_init_one(&src, m_str, vec->m_size);
+	sg_init_one(&dst, outbuf, outbuf_maxlen);
+
+	akcipher_request_set_crypt(req, &src, &dst,
+				   vec->m_size, outbuf_maxlen);
+
+	/* Set up result callback */
+	init_completion(&result.completion);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+				      tcrypt_complete, &result);
+
+	/* Set K in request for signing */
+	req->info = k_str;
+
+	/* Run ecdsa sign operation on message digest */
+	err = wait_async_op(&result, crypto_akcipher_sign(req));
+	if (err) {
+		pr_err("alg: ecdsa: sign(k) test failed. err %d\n", err);
+		goto error;
+	}
+
+	/* verify that signature (r,s) is valid */
+	if (req->dst_len != 2 * nbytes) {
+		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig len\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (memcmp(r_str, sg_virt(req->dst), nbytes)) {
+		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig(r)\n");
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (memcmp(s_str, (u8 *)sg_virt(req->dst) + nbytes, nbytes)) {
+		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig(s)\n");
+		err = -EINVAL;
+		goto error;
+	}
+error:
+	akcipher_request_free(req);
+	kfree(k_str);
+	kfree(r_str);
+	kfree(s_str);
+	kfree(m_str);
+	kfree(outbuf);
+	return err;
+}
+
+static int test_ecdsa_akcipher(struct crypto_akcipher *tfm, const char *alg,
+		       struct akcipher_testvec *vecs, unsigned int tcount)
+{
+	int i, err = 0;
+
+	for (i = 0; i < tcount; i++) {
+		err = do_test_ecdsa_verify(tfm, &vecs[i]);
+		if (!err)
+			continue;
+
+		pr_err("ecdsa: verify failed on vec %d, err=%d\n",
+		       i + 1, err);
+		goto exit;
+	}
+
+	for (i = 0; i < tcount; i++) {
+		err = do_test_ecdsa_invalid_verify(tfm, &vecs[i]);
+		if (!err)
+			continue;
+
+		pr_err("ecdsa: verify(invl) failed on vec %d, err=%d\n",
+		       i + 1, err);
+		goto exit;
+	}
+
+	for (i = 0; i < tcount; i++) {
+		err = do_test_ecdsa_sign_verify(tfm, &vecs[i]);
+		if (!err)
+			continue;
+
+		pr_err("ecdsa: sign/verify failed on vec %d, err=%d\n",
+		       i + 1, err);
+		goto exit;
+	}
+
+	for (i = 0; i < tcount; i++) {
+		err = do_test_ecdsa_sign(tfm, &vecs[i]);
+		if (!err)
+			continue;
+
+		pr_err("ecdsa: sign failed on vec %d, err=%d\n",
+		       i + 1, err);
+		goto exit;
+	}
+ exit:
+	pr_info("test_ecdsa: %s\n", err ? "FAILED" : "PASSED");
+	return err;
+}
+
 static int test_akcipher_one(struct crypto_akcipher *tfm,
 			     struct akcipher_testvec *vecs)
 {
@@ -2236,9 +2667,17 @@ static int alg_test_akcipher(const struct alg_test_desc *desc,
 		       driver, PTR_ERR(tfm));
 		return PTR_ERR(tfm);
 	}
-	if (desc->suite.akcipher.vecs)
-		err = test_akcipher(tfm, desc->alg, desc->suite.akcipher.vecs,
-				    desc->suite.akcipher.count);
+
+	if (desc->suite.akcipher.vecs) {
+		if (strncmp(desc->alg, "ecdsa", 5) == 0)
+			err = test_ecdsa_akcipher(tfm, desc->alg,
+						  desc->suite.akcipher.vecs,
+						  desc->suite.akcipher.count);
+		else
+			err = test_akcipher(tfm, desc->alg,
+					    desc->suite.akcipher.vecs,
+					    desc->suite.akcipher.count);
+	}
 
 	crypto_free_akcipher(tfm);
 	return err;
@@ -2982,6 +3421,13 @@ static int alg_test_null(const struct alg_test_desc *desc,
 			.kpp = __VECS(ecdh_tv_template)
 		}
 	}, {
+		.alg = "ecdsa",
+		.test = alg_test_akcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.akcipher = __VECS(ecdsa_tv_template)
+		}
+	}, {
 		.alg = "gcm(aes)",
 		.test = alg_test_aead,
 		.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 64595f067d72..00bb57f4707a 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -5,6 +5,7 @@
  * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
  * Copyright (c) 2007 Nokia Siemens Networks
  * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2017 NVIDIA Corporation
  *
  * Updated RFC4106 AES-GCM testing. Some test vectors were taken from
  * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/
@@ -755,6 +756,145 @@ struct kpp_testvec dh_tv_template[] = {
 	}
 };
 
+/*
+ * ECDSA NIST test vectors from SigGenComponent.txt file from
+ * 186-3ecdsasiggencomponenttestvectors.zip for P-192 and P-256
+ * elliptic curves.
+ */
+static struct akcipher_testvec ecdsa_tv_template[] = {
+	{
+#ifndef CONFIG_CRYPTO_FIPS
+		/* [P-192,SHA-1] */
+		.m =
+		/* Msg / Hash */
+		"\x92\x5b\xd6\xf4\x1c\x55\xbe\x3e"
+		"\x49\xb7\x16\xe6\x1d\x42\x12\x3f"
+		"\x42\x79\x80\x60",
+		.m_size = 20,
+		.key =
+		/* version */
+		"\x01"
+		/* curve_id */
+		"\x01"
+		/* d */
+		"\xf3\xd7\x60\xd6\x75\xf2\xcc\xeb"
+		"\xf0\xd2\xfd\xb3\xb9\x41\x3f\xb0"
+		"\xf8\x4f\x37\xd1\xb3\x37\x4f\xe1"
+		/* Qx */
+		"\xe6\x98\xcf\x5b\x2d\x2d\x98\x94"
+		"\x4c\x49\xa2\x80\x6e\x09\x32\x64"
+		"\xe7\xdb\x08\x0b\xa4\x8e\x00\x07"
+		/* Qy */
+		"\x77\x54\xd6\xe4\xf2\xd7\x1b\xc4"
+		"\x98\x6d\xe2\x5d\x21\xba\x36\xa6"
+		"\x4e\x41\x0b\xd0\x81\xb6\xfa\x76",
+		.key_len = 74,
+		.c =
+		/* k */
+		"\x25\x5f\x68\x89\xa2\x31\xbc\x57"
+		"\x4d\x15\xc4\x12\xfb\x56\x45\x68"
+		"\x83\x07\xa1\x43\x70\xbc\x0a\xcb"
+		/* R */
+		"\x3e\xa6\x58\x62\xb4\x98\x96\x1a"
+		"\xf9\xf2\x5b\xec\x55\xf8\xdd\xff"
+		"\x93\xd7\xd0\xbd\x62\xd9\x94\x69"
+		/* S */
+		"\x41\x9f\x1a\x0e\xc0\x5f\xcf\x73"
+		"\x5b\x40\x21\x85\xbc\x02\xab\x44"
+		"\x37\x90\x34\xa2\x65\x64\xba\x02",
+		.c_size = 72,
+	}, {
+		/* [P-192,SHA-256] */
+		.m =
+		/* Msg / Hash */
+		"\xd0\xd8\xc0\x99\xe0\xe2\xf7\xf8"
+		"\x87\xe1\x6d\x11\xe1\xcc\x20\x43"
+		"\xaf\xc0\x80\xdb\x47\x72\xfa\xe3"
+		"\x95\xe5\xd1\x34\x7d\x31\xe8\x5a",
+		.m_size = 32,
+		.key =
+		/* version */
+		"\x01"
+		/* curve_id */
+		"\x01"
+		/* d */
+		"\x47\x7a\xf2\x5c\x86\xef\x09\x08"
+		"\xa4\x9a\x47\x53\x06\xfc\x61\xbc"
+		"\xa5\x6f\xdd\x7d\x2f\xd2\xed\x24"
+		/* Qx */
+		"\xdc\x14\xd4\xd8\x2e\x1e\x25\x2f"
+		"\x66\x28\xaa\x80\xbc\x38\x6a\x07"
+		"\x8a\x70\xb7\x74\x71\x2d\xf1\x9b"
+		/* Qy */
+		"\x98\x34\x57\x11\xb0\xdc\x3d\xff"
+		"\xfc\xdc\xfe\xa2\x1c\x47\x9e\x4e"
+		"\x82\x08\xfc\x7d\xd0\xc8\x54\x48",
+		.key_len = 74,
+		.c =
+		/* k */
+		"\x3e\x70\xc7\x86\xaf\xaa\x71\x7c"
+		"\x68\x96\xc5\xc3\xec\xb8\x29\xa3"
+		"\xfa\xf7\xa5\x36\xa2\x17\xc8\xa5"
+		/* R */
+		"\xf8\xef\x13\xa8\x86\xe6\x73\x85"
+		"\xdf\x2e\x88\x99\x91\x9b\xc2\x90"
+		"\xea\x1f\x36\xf4\xec\xba\x4a\x35"
+		/* S */
+		"\xc1\x82\x9e\x94\xb7\x58\x2c\x63"
+		"\x8e\xd7\x15\x5a\x38\x47\x30\x9b"
+		"\x1c\x11\x86\xac\x00\x00\xf5\x80",
+		.c_size = 72,
+	}, {
+#endif
+		/* [P-256,SHA-256] */
+		.m =
+		/* Msg / Hash */
+		"\x56\xec\x33\xa1\xa6\xe7\xc4\xdb"
+		"\x77\x03\x90\x1a\xfb\x2e\x1e\x4e"
+		"\x50\x09\xfe\x04\x72\x89\xc5\xc2"
+		"\x42\x13\x6c\xe3\xb7\xf6\xac\x44",
+		.m_size = 32,
+		.key =
+		/* version */
+		"\x01"
+		/* curve_id */
+		"\x02"
+		/* d */
+		"\x64\xb4\x72\xda\x6d\xa5\x54\xca"
+		"\xac\x3e\x4e\x0b\x13\xc8\x44\x5b"
+		"\x1a\x77\xf4\x59\xee\xa8\x4f\x1f"
+		"\x58\x8b\x5f\x71\x3d\x42\x9b\x51"
+		/* Qx */
+		"\x83\xbf\x71\xc2\x46\xff\x59\x3c"
+		"\x2f\xb1\xbf\x4b\xe9\x5d\x56\xd3"
+		"\xcc\x8f\xdb\x48\xa2\xbf\x33\xf0"
+		"\xf4\xc7\x5f\x07\x1c\xe9\xcb\x1c"
+		/* Qy */
+		"\xa9\x4c\x9a\xa8\x5c\xcd\x7c\xdc"
+		"\x78\x4e\x40\xb7\x93\xca\xb7\x6d"
+		"\xe0\x13\x61\x0e\x2c\xdb\x1f\x1a"
+		"\xa2\xf9\x11\x88\xc6\x14\x40\xce",
+		.key_len = 98,
+		.c =
+		/* k */
+		"\xde\x68\x2a\x64\x87\x07\x67\xb9"
+		"\x33\x5d\x4f\x82\x47\x62\x4a\x3b"
+		"\x7f\x3c\xe9\xf9\x45\xf2\x80\xa2"
+		"\x61\x6a\x90\x4b\xb1\xbb\xa1\x94"
+		/* R */
+		"\xac\xc2\xc8\x79\x6f\x5e\xbb\xca"
+		"\x7a\x5a\x55\x6a\x1f\x6b\xfd\x2a"
+		"\xed\x27\x95\x62\xd6\xe3\x43\x88"
+		"\x5b\x79\x14\xb5\x61\x80\xac\xf3"
+		/* S */
+		"\x03\x89\x05\xcc\x2a\xda\xcd\x3c"
+		"\x5a\x17\x6f\xe9\x18\xb2\x97\xef"
+		"\x1c\x37\xf7\x2b\x26\x76\x6c\x78"
+		"\xb2\xa6\x05\xca\x19\x78\xf7\x8b",
+		.c_size = 96,
+	},
+};
+
 struct kpp_testvec ecdh_tv_template[] = {
 	{
 #ifndef CONFIG_CRYPTO_FIPS
-- 
1.7.6.3

^ permalink raw reply related

* Re: [PATCH 4/6] crypto: ecdsa: add ECDSA SW implementation
From: Stephan Müller @ 2017-01-20 13:06 UTC (permalink / raw)
  To: Nitin Kumbhar; +Cc: herbert, davem, linux-crypto
In-Reply-To: <1484912161-5932-5-git-send-email-nkumbhar@nvidia.com>

Am Freitag, 20. Januar 2017, 17:05:59 CET schrieb Nitin Kumbhar:

Hi Nitin,

> This adds support for ECDSA algorithm. This implementation supports
> sign and verify functions for ECDSA algorithm using akcipher. As ECDSA
> is a signing algorithm dummy functions are added for encrypt() and
> decrypt().
> 
> Helper routines for parsing public and private ECC keys for ECDSA are
> added as well.
> 
> Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
> ---
>  crypto/Kconfig            |    7 +
>  crypto/Makefile           |    3 +
>  crypto/ecdsa.c            |  331
> +++++++++++++++++++++++++++++++++++++++++++++ crypto/ecdsa_helper.c     | 
> 116 ++++++++++++++++
>  include/crypto/akcipher.h |    5 +-
>  include/crypto/ecdsa.h    |   29 ++++
>  6 files changed, 490 insertions(+), 1 deletions(-)
>  create mode 100644 crypto/ecdsa.c
>  create mode 100644 crypto/ecdsa_helper.c
>  create mode 100644 include/crypto/ecdsa.h
> 
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index e240075d6f46..1c5c236b3bbc 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -140,6 +140,13 @@ config CRYPTO_ECDH
>  	help
>  	  Generic implementation of the ECDH algorithm
> 
> +config CRYPTO_ECDSA
> +	tristate "ECDSA algorithm"
> +	select CRYPTO_AKCIPHER
> +	select CRYPTO_ECC
> +	help
> +	  Generic implementation of the ECDSA algorithm
> +
>  config CRYPTO_MANAGER
>  	tristate "Cryptographic algorithm manager"
>  	select CRYPTO_MANAGER2
> diff --git a/crypto/Makefile b/crypto/Makefile
> index 827740a47a37..9c13eb2ade6a 100644
> --- a/crypto/Makefile
> +++ b/crypto/Makefile
> @@ -38,6 +38,9 @@ obj-$(CONFIG_CRYPTO_ECC) += ecc.o
>  ecdh_generic-y := ecdh.o
>  ecdh_generic-y += ecdh_helper.o
>  obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
> +ecdsa_generic-y := ecdsa.o
> +ecdsa_generic-y += ecdsa_helper.o
> +obj-$(CONFIG_CRYPTO_ECDSA) += ecdsa_generic.o
> 
>  $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
>  $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
> diff --git a/crypto/ecdsa.c b/crypto/ecdsa.c
> new file mode 100644
> index 000000000000..d415900af3bd
> --- /dev/null
> +++ b/crypto/ecdsa.c
> @@ -0,0 +1,331 @@
> +/*
> + * ECDSA generic algorithm
> + *
> + * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> Free + * Software Foundation; either version 2 of the License, or (at your
> option) + * any later version.
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/scatterlist.h>
> +#include <linux/random.h>
> +#include <crypto/internal/akcipher.h>
> +#include <crypto/akcipher.h>
> +#include <crypto/ecdsa.h>
> +
> +#include "ecc.h"
> +
> +struct ecdsa_ctx {
> +	unsigned int curve_id;
> +	unsigned int ndigits;
> +	u64 private_key[ECC_MAX_DIGITS];
> +	u64 public_key[2 * ECC_MAX_DIGITS];
> +};
> +
> +static inline struct ecdsa_ctx *ecdsa_get_ctx(struct crypto_akcipher *tfm)
> +{
> +	return akcipher_tfm_ctx(tfm);
> +}
> +
> +static void ecdsa_parse_msg_hash(struct akcipher_request *req, u64 *msg,
> +				 unsigned int ndigits)
> +{
> +	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +	unsigned int hash_len, hash_off;
> +	unsigned char *hash, *msg_ptr;
> +	int i;
> +
> +	/*
> +	 * If hash_len == nbytes:
> +	 *	copy nbytes from req
> +	 * If hash_len > nbytes:
> +	 *	copy left most nbytes from hash ignoring LSBs
> +	 * If hash_len < nbytes:
> +	 *	copy hash_len from req and zero remaining bytes
> +	 *	(nbytes - hash_len)
> +	 */
> +	hash_len = req->src[0].length;
> +	hash_off = hash_len <= nbytes ? 0 : hash_len - nbytes;
> +
> +	msg_ptr = (unsigned char *)msg;
> +	hash = sg_virt(&req->src[0]);
> +
> +	for (i = hash_off; i < hash_len; i++)
> +		*msg_ptr++ = hash[i];
> +	for (; i < nbytes; i++)
> +		*msg_ptr++ = 0;
> +}
> +
> +int ecdsa_sign(struct akcipher_request *req)
> +{
> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
> +	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
> +	unsigned int ndigits = ctx->ndigits;
> +	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +	unsigned int curve_id = ctx->curve_id;
> +	const struct ecc_curve *curve = ecc_get_curve(curve_id);
> +	struct ecc_point *x1y1 = NULL;
> +	u64 z[ndigits], d[ndigits];
> +	u64 k[ndigits], k_inv[ndigits];
> +	u64 r[ndigits], s[ndigits];
> +	u64 dr[ndigits], zdr[ndigits];
> +	u8 *r_ptr, *s_ptr;
> +
> +	if (req->dst_len < 2 * nbytes) {
> +		req->dst_len = 2 * nbytes;
> +		return -EINVAL;
> +	}
> +
> +	ecdsa_parse_msg_hash(req, z, ndigits);
> +
> +	/* d */
> +	vli_set(d, (const u64 *)ctx->private_key, ndigits);
> +
> +	/* k */
> +#if defined(CONFIG_CRYPTO_MANAGER2)
> +	if (req->info) {
> +		vli_copy_from_buf(k, ndigits, req->info, nbytes);
> +	} else
> +#endif
> +		get_random_bytes(k, nbytes);

Please use crypto_get_default_rng /crypto_rng_get_bytes / 
crypto_put_default_rng here.

> +
> +	x1y1 = ecc_alloc_point(ndigits);
> +	if (!x1y1)
> +		return -ENOMEM;
> +
> +	/* (x1, y1) = k x G */
> +	ecc_point_mult(x1y1, &curve->g, k, NULL, curve->p, ndigits);
> +
> +	/* r = x1 mod n */
> +	vli_mod(r, x1y1->x, curve->n, ndigits);
> +
> +	/* k^-1 */
> +	vli_mod_inv(k_inv, k, curve->n, ndigits);
> +
> +	/* d . r mod n */
> +	vli_mod_mult(dr, d, r, curve->n, ndigits);
> +
> +	/* z + dr mod n */
> +	vli_mod_add(zdr, z, dr, curve->n, ndigits);
> +
> +	/* k^-1 . ( z + dr) mod n */
> +	vli_mod_mult(s, k_inv, zdr, curve->n, ndigits);
> +
> +	/* write signature (r,s) in dst */
> +	r_ptr = sg_virt(req->dst);
> +	s_ptr = (u8 *)sg_virt(req->dst) + nbytes;
> +
> +	vli_copy_to_buf(r_ptr, nbytes, r, ndigits);
> +	vli_copy_to_buf(s_ptr, nbytes, s, ndigits);
> +
> +	req->dst_len = 2 * nbytes;
> +
> +	ecc_free_point(x1y1);
> +	return 0;
> +}
> +
> +int ecdsa_verify(struct akcipher_request *req)
> +{
> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
> +	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
> +	unsigned int ndigits = ctx->ndigits;
> +	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +	unsigned int curve_id = ctx->curve_id;
> +	const struct ecc_curve *curve = ecc_get_curve(curve_id);
> +	struct ecc_point *x1y1 = NULL, *x2y2 = NULL, *Q = NULL;
> +	u64 r[ndigits], s[ndigits], v[ndigits];
> +	u64 z[ndigits], w[ndigits];
> +	u64 u1[ndigits], u2[ndigits];
> +	u64 x1[ndigits], x2[ndigits];
> +	u64 y1[ndigits], y2[ndigits];
> +	u64 *ctx_qx, *ctx_qy;
> +	int ret;
> +
> +	x1y1 = ecc_alloc_point(ndigits);
> +	x2y2 = ecc_alloc_point(ndigits);
> +	Q = ecc_alloc_point(ndigits);
> +	if (!x1y1 || !x2y2 || !Q) {
> +		ret = -ENOMEM;
> +		goto exit;
> +	}
> +
> +	ecdsa_parse_msg_hash(req, z, ndigits);
> +
> +	/* Signature r,s */
> +	vli_copy_from_buf(r, ndigits, sg_virt(&req->src[1]), nbytes);
> +	vli_copy_from_buf(s, ndigits, sg_virt(&req->src[2]), nbytes);
> +
> +	/* w = s^-1 mod n */
> +	vli_mod_inv(w, s, curve->n, ndigits);
> +
> +	/* u1 = zw mod n */
> +	vli_mod_mult(u1, z, w, curve->n, ndigits);
> +
> +	/* u2 = rw mod n */
> +	vli_mod_mult(u2, r, w, curve->n, ndigits);
> +
> +	/* u1 . G */
> +	ecc_point_mult(x1y1, &curve->g, u1, NULL, curve->p, ndigits);
> +
> +	/* Q=(Qx,Qy) */
> +	ctx_qx = ctx->public_key;
> +	ctx_qy = ctx_qx + ECC_MAX_DIGITS;
> +	vli_set(Q->x, ctx_qx, ndigits);
> +	vli_set(Q->y, ctx_qy, ndigits);
> +
> +	/* u2 x Q */
> +	ecc_point_mult(x2y2, Q, u2, NULL, curve->p, ndigits);
> +
> +	vli_set(x1, x1y1->x, ndigits);
> +	vli_set(y1, x1y1->y, ndigits);
> +	vli_set(x2, x2y2->x, ndigits);
> +	vli_set(y2, x2y2->y, ndigits);
> +
> +	/* x1y1 + x2y2 => P + Q; P + Q in x2 y2 */
> +	ecc_point_add(x1, y1, x2, y2, curve->p, ndigits);
> +
> +	/* v = x mod n */
> +	vli_mod(v, x2, curve->n, ndigits);
> +
> +	/* validate signature */
> +	ret = vli_cmp(v, r, ndigits) == 0 ? 0 : -EBADMSG;
> + exit:
> +	ecc_free_point(x1y1);
> +	ecc_free_point(x2y2);
> +	ecc_free_point(Q);
> +	return ret;
> +}
> +
> +int ecdsa_dummy_enc(struct akcipher_request *req)
> +{
> +	return -EINVAL;
> +}
> +
> +int ecdsa_dummy_dec(struct akcipher_request *req)
> +{
> +	return -EINVAL;
> +}
> +
> +int ecdsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
> +		      unsigned int keylen)
> +{
> +	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
> +	struct ecdsa params;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +	u8 *params_qx, *params_qy;
> +	u64 *ctx_qx, *ctx_qy;
> +	int ret = 0;
> +
> +	if (crypto_ecdsa_parse_pub_key(key, keylen, &params))
> +		return -EINVAL;
> +
> +	ndigits = ecdsa_supported_curve(params.curve_id);
> +
> +	ctx->curve_id = params.curve_id;
> +	ctx->ndigits = ndigits;
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	params_qx = params.key;
> +	params_qy = params_qx + ECC_MAX_DIGIT_BYTES;
> +
> +	ctx_qx = ctx->public_key;
> +	ctx_qy = ctx_qx + ECC_MAX_DIGITS;
> +
> +	vli_copy_from_buf(ctx_qx, ndigits, params_qx, nbytes);
> +	vli_copy_from_buf(ctx_qy, ndigits, params_qy, nbytes);
> +
> +	memset(&params, 0, sizeof(params));
> +	return ret;

Shouldn't there be a check that the point is on the curve? As I think the same 
issue is applicable to ECDH, I guess a common service function should be used 
that we can also invoke from ECDH.

> +}
> +
> +int ecdsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
> +		       unsigned int keylen)
> +{
> +	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
> +	struct ecdsa params;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +
> +	if (crypto_ecdsa_parse_priv_key(key, keylen, &params))
> +		return -EINVAL;
> +
> +	ndigits = ecdsa_supported_curve(params.curve_id);
> +
> +	ctx->curve_id = params.curve_id;
> +	ctx->ndigits = ndigits;
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	if (ecc_is_key_valid(ctx->curve_id, ctx->ndigits,
> +			     (const u8 *)params.key, params.key_size) < 0)
> +		return -EINVAL;
> +
> +	vli_copy_from_buf(ctx->private_key, ndigits, params.key, nbytes);
> +
> +	memset(&params, 0, sizeof(params));
> +	return 0;
> +}
> +
> +int ecdsa_max_size(struct crypto_akcipher *tfm)
> +{
> +	struct ecdsa_ctx *ctx = ecdsa_get_ctx(tfm);
> +	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	/* For r,s */
> +	return 2 * nbytes;
> +}
> +
> +int ecdsa_init_tfm(struct crypto_akcipher *tfm)
> +{
> +	return 0;
> +}
> +
> +void ecdsa_exit_tfm(struct crypto_akcipher *tfm)
> +{
> +}
> +
> +static struct akcipher_alg ecdsa_alg = {
> +	.sign		= ecdsa_sign,
> +	.verify		= ecdsa_verify,
> +	.encrypt	= ecdsa_dummy_enc,
> +	.decrypt	= ecdsa_dummy_dec,
> +	.set_priv_key	= ecdsa_set_priv_key,
> +	.set_pub_key	= ecdsa_set_pub_key,
> +	.max_size	= ecdsa_max_size,
> +	.init		= ecdsa_init_tfm,
> +	.exit		= ecdsa_exit_tfm,
> +	.base = {
> +		.cra_name	= "ecdsa",
> +		.cra_driver_name = "ecdsa-generic",
> +		.cra_priority	= 100,
> +		.cra_module	= THIS_MODULE,
> +		.cra_ctxsize	= sizeof(struct ecdsa_ctx),
> +	},
> +};
> +
> +static int ecdsa_init(void)
> +{
> +	int ret;
> +
> +	ret = crypto_register_akcipher(&ecdsa_alg);
> +	if (ret)
> +		pr_err("ecdsa alg register failed. err:%d\n", ret);
> +	return ret;
> +}
> +
> +static void ecdsa_exit(void)
> +{
> +	crypto_unregister_akcipher(&ecdsa_alg);
> +}
> +
> +module_init(ecdsa_init);
> +module_exit(ecdsa_exit);
> +
> +MODULE_ALIAS_CRYPTO("ecdsa");
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("ECDSA Generic Algorithm");
> +MODULE_AUTHOR("NVIDIA Corporation");
> diff --git a/crypto/ecdsa_helper.c b/crypto/ecdsa_helper.c
> new file mode 100644
> index 000000000000..d31eb54431a9
> --- /dev/null
> +++ b/crypto/ecdsa_helper.c
> @@ -0,0 +1,116 @@
> +/*
> + * ECDSA helper routines
> + *
> + * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> Free + * Software Foundation; either version 2 of the License, or (at your
> option) + * any later version.
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/export.h>
> +#include <linux/err.h>
> +#include <linux/string.h>
> +#include <crypto/ecdsa.h>
> +
> +#include "ecc.h"
> +
> +#define ECDSA_KEY_MIN_SIZE	(1 + 1 + 24) /* ver + cid + n (P-192) */
> +
> +unsigned int ecdsa_supported_curve(unsigned int curve_id)
> +{
> +	switch (curve_id) {
> +	case ECC_CURVE_NIST_P192: return 3;
> +	case ECC_CURVE_NIST_P256: return 4;
> +	default: return 0;
> +	}
> +}

Wouldn't it be better to have a common function with ECDH as this has almost 
the same code in ecdh_supported_curve?

> +
> +static inline u8 *ecdsa_pack_data(void *dst, const void *src, size_t sz)
> +{
> +	memcpy(dst, src, sz);
> +	return dst + sz;
> +}
> +
> +static inline const u8 *ecdsa_unpack_data(void *dst, const void *src,
> size_t sz) +{
> +	memcpy(dst, src, sz);
> +	return src + sz;
> +}
> +
> +int crypto_ecdsa_parse_pub_key(const char *buf, unsigned int len,
> +			       struct ecdsa *params)
> +{
> +	unsigned char version;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +	const u8 *ptr = buf;
> +	u8 *qx, *qy;
> +
> +	if (unlikely(!buf) || len < ECDSA_KEY_MIN_SIZE)
> +		return -EINVAL;
> +
> +	ptr = ecdsa_unpack_data(&version, ptr, sizeof(version));
> +	if (version != 1)
> +		return -EINVAL;
> +
> +	ptr = ecdsa_unpack_data(&params->curve_id, ptr,
> +				sizeof(params->curve_id));
> +
> +	ndigits = ecdsa_supported_curve(params->curve_id);
> +	if (!ndigits)
> +		return -EINVAL;
> +
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	/* skip private key */
> +	ptr = ecdsa_unpack_data(&params->key, ptr, nbytes);
> +
> +	/* copy public key */
> +	qx = params->key;
> +	qy = qx + ECC_MAX_DIGIT_BYTES;
> +
> +	ptr = ecdsa_unpack_data(qx, ptr, nbytes);
> +	ptr = ecdsa_unpack_data(qy, ptr, nbytes);
> +
> +	params->key_size = 2 * nbytes;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(crypto_ecdsa_parse_pub_key);
> +
> +int crypto_ecdsa_parse_priv_key(const char *buf, unsigned int len,
> +				struct ecdsa *params)
> +{
> +	unsigned char version;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +	const u8 *ptr = buf;
> +
> +	if (unlikely(!buf) || len < ECDSA_KEY_MIN_SIZE)
> +		return -EINVAL;
> +
> +	ptr = ecdsa_unpack_data(&version, ptr, sizeof(version));
> +	if (version != 1)
> +		return -EINVAL;
> +
> +	ptr = ecdsa_unpack_data(&params->curve_id, ptr,
> +				sizeof(params->curve_id));
> +
> +	ndigits = ecdsa_supported_curve(params->curve_id);
> +	if (!ndigits)
> +		return -EINVAL;
> +
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	params->key_size = nbytes;
> +
> +	/* copy private key */
> +	ptr = ecdsa_unpack_data(&params->key, ptr, nbytes);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(crypto_ecdsa_parse_priv_key);
> diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
> index c37cc59e9bf2..6b34e9043a6f 100644
> --- a/include/crypto/akcipher.h
> +++ b/include/crypto/akcipher.h
> @@ -3,6 +3,7 @@
>   *
>   * Copyright (c) 2015, Intel Corporation
>   * Authors: Tadeusz Struk <tadeusz.struk@intel.com>
> + * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
>   *
>   * This program is free software; you can redistribute it and/or modify it
>   * under the terms of the GNU General Public License as published by the
> Free @@ -27,6 +28,7 @@
>   *		result.
>   *		In case of error where the dst sgl size was insufficient,
>   *		it will be updated to the size required for the operation.
> + * @info:	Any request specific data needed to process the request.
>   * @__ctx:	Start of private context data
>   */
>  struct akcipher_request {
> @@ -35,6 +37,7 @@ struct akcipher_request {
>  	struct scatterlist *dst;
>  	unsigned int src_len;
>  	unsigned int dst_len;
> +	void *info;
>  	void *__ctx[] CRYPTO_MINALIGN_ATTR;
>  };
> 
> @@ -193,7 +196,7 @@ static inline void crypto_free_akcipher(struct
> crypto_akcipher *tfm) {
>  	struct akcipher_request *req;
> 
> -	req = kmalloc(sizeof(*req) + crypto_akcipher_reqsize(tfm), gfp);
> +	req = kzalloc(sizeof(*req) + crypto_akcipher_reqsize(tfm), gfp);
>  	if (likely(req))
>  		akcipher_request_set_tfm(req, tfm);
> 
> diff --git a/include/crypto/ecdsa.h b/include/crypto/ecdsa.h
> new file mode 100644
> index 000000000000..c71d21e654b9
> --- /dev/null
> +++ b/include/crypto/ecdsa.h
> @@ -0,0 +1,29 @@
> +/*
> + * ECC parameters for ECDSA
> + *
> + * Copyright (c) 2017, NVIDIA Corporation. All Rights Reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> Free + * Software Foundation; either version 2 of the License, or (at your
> option) + * any later version.
> + *
> + */
> +
> +#ifndef _CRYPTO_ECDSA_
> +#define _CRYPTO_ECDSA_
> +
> +#include <crypto/ecc.h>
> +
> +struct ecdsa {
> +	unsigned char curve_id;
> +	unsigned char key[2 * ECC_MAX_DIGIT_BYTES];
> +	unsigned short key_size;
> +};
> +
> +unsigned int ecdsa_supported_curve(unsigned int curve_id);
> +int crypto_ecdsa_parse_pub_key(const char *buf, unsigned int len,
> +			       struct ecdsa *params);
> +int crypto_ecdsa_parse_priv_key(const char *buf, unsigned int len,
> +				struct ecdsa *params);
> +#endif /* _CRYPTO_ECDSA_ */

I would like to see soem documentation here similar to the ecdh.h. Especially, 
can you please add documentation which type of point representation you expect 
as input (e.g. affine representation)?

Ciao
Stephan

^ permalink raw reply

* Re: [PATCH 5/6] crypto: testmgr: add ECDSA tests
From: Stephan Müller @ 2017-01-20 13:19 UTC (permalink / raw)
  To: Nitin Kumbhar; +Cc: herbert, davem, linux-crypto
In-Reply-To: <1484912161-5932-6-git-send-email-nkumbhar@nvidia.com>

Am Freitag, 20. Januar 2017, 17:06:00 CET schrieb Nitin Kumbhar:

Hi Nitin,

> Update crypto test manager to include NIST ECDSA
> test vectors and various ECDSA tests. These include
> tests for ECDSA signing, ECDSA sign-verification,
> ECDSA signing and verifying generated signatures and
> invalidation of incorrect signatures.
> 
> Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
> ---
>  crypto/testmgr.c |  452
> +++++++++++++++++++++++++++++++++++++++++++++++++++++- crypto/testmgr.h | 
> 140 +++++++++++++++++
>  2 files changed, 589 insertions(+), 3 deletions(-)
> 
> diff --git a/crypto/testmgr.c b/crypto/testmgr.c
> index 98eb09782db8..a1db28cbc32d 100644
> --- a/crypto/testmgr.c
> +++ b/crypto/testmgr.c
> @@ -5,6 +5,7 @@
>   * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org>
>   * Copyright (c) 2007 Nokia Siemens Networks
>   * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
> + * Copyright (c) 2017 NVIDIA Corporation
>   *
>   * Updated RFC4106 AES-GCM testing.
>   *    Authors: Aidan O'Mahony (aidan.o.mahony@intel.com)
> @@ -2085,6 +2086,436 @@ static int alg_test_kpp(const struct alg_test_desc
> *desc, const char *driver, return err;
>  }
> 
> +static int do_test_ecdsa_verify(struct crypto_akcipher *tfm,
> +			      struct akcipher_testvec *vec)
> +{
> +	struct akcipher_request *req = NULL;
> +	u8 *r_str = NULL, *s_str = NULL;
> +	u8 *m_str = NULL;
> +	struct scatterlist src_tab[3], dst;
> +	struct tcrypt_result result;
> +	unsigned int outbuf_maxlen;
> +	u8 *outbuf = NULL;
> +	unsigned int nbytes;
> +	int err;
> +
> +	/* Alloc akcipher request */
> +	req = akcipher_request_alloc(tfm, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	/* Set private key */
> +	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
> +	if (err)
> +		goto error;
> +
> +	/*
> +	 * vec->c always contains k, R and S in that order. All are
> +	 * of same size and are equal to n i.e. the order of
> +	 * an elliptic curve.
> +	 */
> +	nbytes = vec->c_size / 3;
> +
> +	r_str = kzalloc(nbytes, GFP_KERNEL);
> +	s_str = kzalloc(nbytes, GFP_KERNEL);
> +	m_str = kzalloc(vec->m_size, GFP_KERNEL);
> +	if (!r_str || !s_str || !m_str) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +	memcpy(r_str, (u8 *)vec->c + nbytes, nbytes);
> +	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
> +	memcpy(m_str, vec->m, vec->m_size);
> +
> +	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
> +	if (outbuf_maxlen < 0) {
> +		err = outbuf_maxlen;
> +		goto error;
> +	}
> +	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
> +	if (!outbuf) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +
> +	/* Set src and dst buffers */
> +	sg_init_table(src_tab, 3);
> +	sg_set_buf(&src_tab[0], m_str, vec->m_size);
> +	sg_set_buf(&src_tab[1], r_str, nbytes);
> +	sg_set_buf(&src_tab[2], s_str, nbytes);
> +	sg_init_one(&dst, outbuf, outbuf_maxlen);
> +
> +	akcipher_request_set_crypt(req, src_tab, &dst,
> +				   vec->m_size + 2 * nbytes, outbuf_maxlen);
> +
> +	/* Set up result callback */
> +	init_completion(&result.completion);
> +	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				      tcrypt_complete, &result);
> +
> +	/* Run ecdsa verify operation on sig (r,s) */
> +	err = wait_async_op(&result, crypto_akcipher_verify(req));
> +	if (err) {
> +		pr_err("alg: ecdsa: verify(rs) test failed. err %d\n", err);
> +		goto error;
> +	}
> +error:
> +	akcipher_request_free(req);
> +	kfree(r_str);
> +	kfree(s_str);
> +	kfree(m_str);
> +	kfree(outbuf);
> +	return err;
> +}
> +
> +static int do_test_ecdsa_invalid_verify(struct crypto_akcipher *tfm,
> +					struct akcipher_testvec *vec)
> +{
> +	struct akcipher_request *req = NULL;
> +	u8 *r_str = NULL, *s_str = NULL;
> +	u8 *m_str = NULL;
> +	struct scatterlist src_tab[3], dst;
> +	struct tcrypt_result result;
> +	unsigned int outbuf_maxlen;
> +	u8 *outbuf = NULL;
> +	unsigned int nbytes;
> +	int err;
> +
> +	/* Alloc akcipher request */
> +	req = akcipher_request_alloc(tfm, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	/* Set private key */
> +	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
> +	if (err)
> +		goto error;
> +
> +	/*
> +	 * vec->c always contains k, R and S in that order. All are
> +	 * of same size and are equal to n i.e. the order of
> +	 * an elliptic curve.
> +	 */
> +	nbytes = vec->c_size / 3;
> +
> +	r_str = kzalloc(nbytes, GFP_KERNEL);
> +	s_str = kzalloc(nbytes, GFP_KERNEL);
> +	m_str = kzalloc(vec->m_size, GFP_KERNEL);
> +	if (!r_str || !s_str || !m_str) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +	memcpy(r_str, (u8 *)vec->c + 1 * nbytes, nbytes);
> +	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
> +	memcpy(m_str, vec->m, vec->m_size);
> +
> +	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
> +	if (outbuf_maxlen < 0) {
> +		err = outbuf_maxlen;
> +		goto error;
> +	}
> +	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
> +	if (!outbuf) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +
> +	/* Set src and dst buffers */
> +	sg_init_table(src_tab, 3);
> +	/* Intentionally set m_size to 8 to have invalid hash */
> +	sg_set_buf(&src_tab[0], m_str, 8);
> +	sg_set_buf(&src_tab[1], r_str, nbytes);
> +	sg_set_buf(&src_tab[2], s_str, nbytes);
> +	sg_init_one(&dst, outbuf, outbuf_maxlen);
> +
> +	akcipher_request_set_crypt(req, src_tab, &dst,
> +				   vec->m_size + 2 * nbytes, outbuf_maxlen);
> +
> +	/* Set up result callback */
> +	init_completion(&result.completion);
> +	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				      tcrypt_complete, &result);
> +
> +	/* Run ecdsa verify operation on sig (r,s) */
> +	err = wait_async_op(&result, crypto_akcipher_verify(req));
> +	if (err != -EBADMSG) {
> +		pr_err("alg: ecdsa: invalid verify test failed. err %d\n", err);
> +		goto error;
> +	}
> +	err = 0;
> +error:
> +	akcipher_request_free(req);
> +	kfree(r_str);
> +	kfree(s_str);
> +	kfree(m_str);
> +	kfree(outbuf);
> +	return err;
> +}

There seems to be a lot of code duplication between 
do_test_ecdsa_invalid_verify and do_test_ecdsa_verify -- can this be 
eliminated?
> +
> +static int do_test_ecdsa_sign_verify(struct crypto_akcipher *tfm,
> +				     struct akcipher_testvec *vec)
> +{
> +	struct akcipher_request *req = NULL;
> +	u8 *r_str = NULL, *s_str = NULL;
> +	u8 *m_str = NULL;
> +	struct scatterlist src_tab[3];
> +	struct scatterlist src, dst;
> +	struct tcrypt_result result;
> +	unsigned int outbuf_maxlen;
> +	void *outbuf = NULL;
> +	unsigned int nbytes;
> +	int err;
> +
> +	/* Alloc akcipher request */
> +	req = akcipher_request_alloc(tfm, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	/* Set private key */
> +	err = crypto_akcipher_set_priv_key(tfm, vec->key, vec->key_len);
> +	if (err)
> +		goto error;
> +
> +	/* Set private key */
> +	err = crypto_akcipher_set_pub_key(tfm, vec->key, vec->key_len);
> +	if (err)
> +		goto error;
> +
> +	/*
> +	 * vec->c always contains k, R and S in that order. All are
> +	 * of same size and are equal to n i.e. the order of
> +	 * an elliptic curve.
> +	 */
> +	nbytes = vec->c_size / 3;
> +
> +	m_str = kzalloc(vec->m_size, GFP_KERNEL);
> +	if (!m_str) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +	memcpy(m_str, vec->m, vec->m_size);
> +
> +	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
> +	if (outbuf_maxlen < 0) {
> +		err = outbuf_maxlen;
> +		goto error;
> +	}
> +	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
> +	if (!outbuf) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +
> +	sg_init_one(&src, m_str, vec->m_size);
> +	sg_init_one(&dst, outbuf, outbuf_maxlen);
> +
> +	akcipher_request_set_crypt(req, &src, &dst,
> +				   vec->m_size, outbuf_maxlen);
> +
> +	/* Set up result callback */
> +	init_completion(&result.completion);
> +	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				      tcrypt_complete, &result);
> +
> +	/* Run ecdsa sign operation on message digest */
> +	err = wait_async_op(&result, crypto_akcipher_sign(req));
> +	if (err) {
> +		pr_err("alg: ecdsa: sign test failed. err %d\n", err);
> +		goto error;
> +	}
> +
> +	/* verify that signature (r,s) is valid */
> +	if (req->dst_len != 2 * nbytes) {
> +		pr_err("alg: ecdsa: sign test failed. Invalid sig len\n");
> +		err = -EINVAL;
> +		goto error;
> +	}
> +
> +	/* output contains r and s */
> +	r_str = outbuf;
> +	s_str = (u8 *)outbuf + nbytes;
> +
> +	/* Set src and dst buffers */
> +	sg_init_table(src_tab, 3);
> +	sg_set_buf(&src_tab[0], m_str, vec->m_size);
> +	sg_set_buf(&src_tab[1], r_str, nbytes);
> +	sg_set_buf(&src_tab[2], s_str, nbytes);
> +	sg_init_one(&dst, outbuf, outbuf_maxlen);
> +
> +	akcipher_request_set_crypt(req, src_tab, &dst,
> +				   vec->m_size + 2 * nbytes, outbuf_maxlen);
> +
> +	/* Set up result callback */
> +	init_completion(&result.completion);
> +	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				      tcrypt_complete, &result);
> +
> +	/* Run ecdsa verify operation on sig (r,s) */
> +	err = wait_async_op(&result, crypto_akcipher_verify(req));
> +	if (err) {
> +		pr_err("alg: ecdsa: verify test failed. err %d\n", err);
> +		goto error;
> +	}
> +error:
> +	akcipher_request_free(req);
> +	kfree(m_str);
> +	kfree(outbuf);
> +	return err;
> +}
> +
> +static int do_test_ecdsa_sign(struct crypto_akcipher *tfm,
> +			      struct akcipher_testvec *vec)
> +{
> +	struct akcipher_request *req = NULL;
> +	u8 *r_str = NULL, *s_str = NULL;
> +	u8 *k_str = NULL, *m_str = NULL;
> +	struct scatterlist src, dst;
> +	struct tcrypt_result result;
> +	unsigned int outbuf_maxlen;
> +	void *outbuf = NULL;
> +	unsigned int nbytes;
> +	int err;
> +
> +	/* Alloc akcipher request */
> +	req = akcipher_request_alloc(tfm, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	/* Set private key */
> +	err = crypto_akcipher_set_priv_key(tfm, vec->key, vec->key_len);
> +	if (err)
> +		goto error;
> +
> +	/*
> +	 * vec->c always contains k, R and S in that order. All are
> +	 * of same size and are equal to n i.e. the order of
> +	 * an elliptic curve.
> +	 */
> +	nbytes = vec->c_size / 3;
> +
> +	k_str = kzalloc(nbytes, GFP_KERNEL);
> +	r_str = kzalloc(nbytes, GFP_KERNEL);
> +	s_str = kzalloc(nbytes, GFP_KERNEL);
> +	m_str = kzalloc(vec->m_size, GFP_KERNEL);
> +	if (!k_str || !r_str || !s_str || !m_str) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +	memcpy(k_str, (u8 *)vec->c + 0 * nbytes, nbytes);
> +	memcpy(r_str, (u8 *)vec->c + 1 * nbytes, nbytes);
> +	memcpy(s_str, (u8 *)vec->c + 2 * nbytes, nbytes);
> +	memcpy(m_str, vec->m, vec->m_size);
> +
> +	outbuf_maxlen = crypto_akcipher_maxsize(tfm);
> +	if (outbuf_maxlen < 0) {
> +		err = outbuf_maxlen;
> +		goto error;
> +	}
> +	outbuf = kzalloc(outbuf_maxlen, GFP_KERNEL);
> +	if (!outbuf) {
> +		err = -ENOMEM;
> +		goto error;
> +	}
> +
> +	/* Set src and dst buffers */
> +	sg_init_one(&src, m_str, vec->m_size);
> +	sg_init_one(&dst, outbuf, outbuf_maxlen);
> +
> +	akcipher_request_set_crypt(req, &src, &dst,
> +				   vec->m_size, outbuf_maxlen);
> +
> +	/* Set up result callback */
> +	init_completion(&result.completion);
> +	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				      tcrypt_complete, &result);
> +
> +	/* Set K in request for signing */
> +	req->info = k_str;
> +
> +	/* Run ecdsa sign operation on message digest */
> +	err = wait_async_op(&result, crypto_akcipher_sign(req));
> +	if (err) {
> +		pr_err("alg: ecdsa: sign(k) test failed. err %d\n", err);
> +		goto error;
> +	}
> +
> +	/* verify that signature (r,s) is valid */
> +	if (req->dst_len != 2 * nbytes) {
> +		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig len\n");
> +		err = -EINVAL;
> +		goto error;
> +	}
> +
> +	if (memcmp(r_str, sg_virt(req->dst), nbytes)) {
> +		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig(r)\n");
> +		err = -EINVAL;
> +		goto error;
> +	}
> +
> +	if (memcmp(s_str, (u8 *)sg_virt(req->dst) + nbytes, nbytes)) {
> +		pr_err("alg: ecdsa: sign(k) test failed. Invalid sig(s)\n");
> +		err = -EINVAL;
> +		goto error;
> +	}
> +error:
> +	akcipher_request_free(req);
> +	kfree(k_str);
> +	kfree(r_str);
> +	kfree(s_str);
> +	kfree(m_str);
> +	kfree(outbuf);
> +	return err;
> +}

Same here -- there seem to be a lot of code duplication -- can this be 
reduced?
> +
> +static int test_ecdsa_akcipher(struct crypto_akcipher *tfm, const char
> *alg, +		       struct akcipher_testvec *vecs, unsigned int tcount)
> +{
> +	int i, err = 0;
> +
> +	for (i = 0; i < tcount; i++) {
> +		err = do_test_ecdsa_verify(tfm, &vecs[i]);
> +		if (!err)
> +			continue;
> +
> +		pr_err("ecdsa: verify failed on vec %d, err=%d\n",
> +		       i + 1, err);

All of these pr_err logs here and below should be removed as these errors seem 
to be already logged.

> +		goto exit;
> +	}
> +
> +	for (i = 0; i < tcount; i++) {
> +		err = do_test_ecdsa_invalid_verify(tfm, &vecs[i]);
> +		if (!err)
> +			continue;
> +
> +		pr_err("ecdsa: verify(invl) failed on vec %d, err=%d\n",
> +		       i + 1, err);
> +		goto exit;
> +	}
> +
> +	for (i = 0; i < tcount; i++) {
> +		err = do_test_ecdsa_sign_verify(tfm, &vecs[i]);
> +		if (!err)
> +			continue;
> +
> +		pr_err("ecdsa: sign/verify failed on vec %d, err=%d\n",
> +		       i + 1, err);
> +		goto exit;
> +	}
> +
> +	for (i = 0; i < tcount; i++) {
> +		err = do_test_ecdsa_sign(tfm, &vecs[i]);
> +		if (!err)
> +			continue;
> +
> +		pr_err("ecdsa: sign failed on vec %d, err=%d\n",
> +		       i + 1, err);
> +		goto exit;
> +	}
> + exit:
> +	pr_info("test_ecdsa: %s\n", err ? "FAILED" : "PASSED");

This log message should go away.

> +	return err;
> +}
> +
>  static int test_akcipher_one(struct crypto_akcipher *tfm,
>  			     struct akcipher_testvec *vecs)
>  {
> @@ -2236,9 +2667,17 @@ static int alg_test_akcipher(const struct
> alg_test_desc *desc, driver, PTR_ERR(tfm));
>  		return PTR_ERR(tfm);
>  	}
> -	if (desc->suite.akcipher.vecs)
> -		err = test_akcipher(tfm, desc->alg, desc->suite.akcipher.vecs,
> -				    desc->suite.akcipher.count);
> +
> +	if (desc->suite.akcipher.vecs) {
> +		if (strncmp(desc->alg, "ecdsa", 5) == 0)
> +			err = test_ecdsa_akcipher(tfm, desc->alg,
> +						  desc->suite.akcipher.vecs,
> +						  desc->suite.akcipher.count);
> +		else
> +			err = test_akcipher(tfm, desc->alg,
> +					    desc->suite.akcipher.vecs,
> +					    desc->suite.akcipher.count);
> +	}
> 
>  	crypto_free_akcipher(tfm);
>  	return err;
> @@ -2982,6 +3421,13 @@ static int alg_test_null(const struct alg_test_desc
> *desc, .kpp = __VECS(ecdh_tv_template)
>  		}
>  	}, {
> +		.alg = "ecdsa",
> +		.test = alg_test_akcipher,
> +		.fips_allowed = 1,
> +		.suite = {
> +			.akcipher = __VECS(ecdsa_tv_template)
> +		}
> +	}, {



Ciao
Stephan

^ permalink raw reply

* Re: [PATCH 0/2] Introduce AMD Secure Processor device
From: Brijesh Singh @ 2017-01-20 15:40 UTC (permalink / raw)
  To: Greg KH
  Cc: brijesh.singh, thomas.lendacky, herbert, arnd, lambert.quentin,
	gary.hook, linux-kernel, Julia.Lawall, weiyongjun1, linux-crypto,
	umgwanakikbuti
In-Reply-To: <20170120084530.GA25333@kroah.com>


On 01/20/2017 02:45 AM, Greg KH wrote:
> On Thu, Jan 19, 2017 at 02:03:12PM -0600, Brijesh Singh wrote:
>> Hi Greg,
>>
>> On 01/19/2017 12:21 PM, Greg KH wrote:
>>> On Thu, Jan 19, 2017 at 01:07:50PM -0500, Brijesh Singh wrote:
>>>> The CCP device (drivers/crypto/ccp/ccp.ko) is part of AMD Secure Processor,
>>>> which is not dedicated solely to crypto. The AMD Secure Processor includes
>>>> CCP and PSP (Platform Secure Processor) devices.
>>>>
>>>> This patch series moves the CCP device driver to the misc directory and
>>>> creates a framework that allows functional component of the AMD Secure
>>>> Processor to be initialized and handled appropriately.
>>>
>>> Why the misc directory?  I don't see the justification here...
>>>
>>
>> Since this driver is not solely for crypto purposes and do not fit in any of
>> the standard categories hence I thought of moving it into misc directory. I
>> am open to other suggestions unless Herbert is ok with leaving it into
>> crypto and allowing the addition of the Secure Processor support.
>>
>> The patch series allows the CCP driver to support other Secure Processor
>> functions, e.g Secure Encrypted Virtualization (SEV) key management. In
>> past, I tried to add SEV support into existing CCP driver [1] but we quickly
>> learned that CCP driver should be moved outside the crypto directory
>> otherwise will end up adding non crypto code into drivers/crypto directory.
>> Once this cleanup is accepted then I can work to add SEV support inside the
>> CCP driver.
>>
>> [1] http://marc.info/?l=linux-kernel&m=147204118426151&w=2
>
> Ok, what type of interface will this driver have with userspace and/or
> other parts of the kernel?  Is there a misc char device burried in there
> somewhere (I couldn't find it in the big diff sent out), or is this
> driver just creating specific apis that other parts of the kernel will
> call if available?
>

Eventually, the driver will export functions which will be used by KVM
to encrypt the guest memory and more. Additionally, If SEV device is 
detected then driver will create a misc char device which can be used by 
userspace to import/export certificates etc.

I do realize that we need to get some more context on why this 
refactoring is needed and how it will benefit the SEV device 
integration. I will drop this patch for now, will include it as part of 
next SEV RFC patch series (which provides the complete context with 
examples).

thanks,

~ Brijesh

> I think we need to see more code here, broken up into sane and
> reviewable pieces, before we could either say "No don't do that!" or
> "sure, let me go merge these!" :)
>
> thanks,
>
> greg k-h
>

^ permalink raw reply

* Re: [PATCH 2/2] misc: amd-sp: introduce the AMD Secure Processor device
From: kbuild test robot @ 2017-01-20 16:20 UTC (permalink / raw)
  To: Brijesh Singh
  Cc: kbuild-all, thomas.lendacky, herbert, arnd, gregkh,
	lambert.quentin, gary.hook, linux-kernel, Julia.Lawall,
	weiyongjun1, linux-crypto, umgwanakikbuti, brijesh.singh
In-Reply-To: <148484929157.30852.14374439914599162367.stgit@brijesh-build-machine>

[-- Attachment #1: Type: text/plain, Size: 1152 bytes --]

Hi Brijesh,

[auto build test WARNING on cryptodev/master]
[also build test WARNING on next-20170120]
[cannot apply to char-misc/char-misc-testing v4.10-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Brijesh-Singh/Introduce-AMD-Secure-Processor-device/20170120-185157
base:   https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
config: xtensa-allmodconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 4.9.0
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=xtensa 

All warnings (new ones prefixed by >>):

warning: (CRYPTO_DEV_CCP_CRYPTO) selects AMD_SP which has unmet direct dependencies ((X86 && PCI || ARM64 && (OF_ADDRESS || ACPI)) && HAS_IOMEM)

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 48199 bytes --]

^ permalink raw reply

* Re: [PATCH 1/2] crypto: move CCP device driver to misc
From: kbuild test robot @ 2017-01-20 16:33 UTC (permalink / raw)
  To: Brijesh Singh
  Cc: kbuild-all, thomas.lendacky, herbert, arnd, gregkh,
	lambert.quentin, gary.hook, linux-kernel, Julia.Lawall,
	weiyongjun1, linux-crypto, umgwanakikbuti, brijesh.singh
In-Reply-To: <148484928147.30852.7911360215597694759.stgit@brijesh-build-machine>

[-- Attachment #1: Type: text/plain, Size: 9273 bytes --]

Hi Brijesh,

[auto build test ERROR on cryptodev/master]
[also build test ERROR on next-20170120]
[cannot apply to char-misc/char-misc-testing v4.10-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Brijesh-Singh/Introduce-AMD-Secure-Processor-device/20170120-185157
base:   https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
config: um-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um 

All errors (new ones prefixed by >>):

   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xc9a1): warning: Using 'getgrnam' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xc7ec): warning: Using 'getpwuid' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xcb05): warning: Using 'getaddrinfo' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoaddr':
   (.text+0x1d595): warning: Using 'gethostbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametonetaddr':
   (.text+0x1d635): warning: Using 'getnetbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoproto':
   (.text+0x1d855): warning: Using 'getprotobyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoport':
   (.text+0x1d687): warning: Using 'getservbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   drivers/built-in.o: In function `ccp_init_dm_workarea':
>> drivers/misc/amd-sp/ccp-ops.c:141: undefined reference to `dma_pool_alloc'
   drivers/built-in.o: In function `ccp_init_dm_workarea':
   include/linux/dma-mapping.h:190: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_sg_free':
   include/linux/dma-mapping.h:242: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_dm_free':
>> drivers/misc/amd-sp/ccp-ops.c:112: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `ccp_dm_free':
   include/linux/dma-mapping.h:207: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_init_data':
   include/linux/dma-mapping.h:227: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_destroy':
>> drivers/misc/amd-sp/ccp-dev-v3.c:501: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp_init':
>> drivers/misc/amd-sp/ccp-dev-v3.c:337: undefined reference to `dma_pool_create'
   drivers/misc/amd-sp/ccp-dev-v3.c:456: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp5_destroy':
   include/linux/dma-mapping.h:484: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:490: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp5_init':
>> drivers/misc/amd-sp/ccp-dev-v5.c:676: undefined reference to `dma_pool_create'
   drivers/built-in.o: In function `ccp5_init':
   include/linux/dma-mapping.h:463: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp5_init':
>> drivers/misc/amd-sp/ccp-dev-v5.c:861: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp_platform_probe':
   drivers/misc/amd-sp/ccp-platform.c:148: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `ccp_platform_probe':
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_cmd_callback':
   drivers/misc/amd-sp/ccp-dmaengine.c:204: undefined reference to `dma_run_dependencies'
   drivers/built-in.o: In function `ccp_alloc_dma_desc':
   drivers/misc/amd-sp/ccp-dmaengine.c:306: undefined reference to `dma_async_tx_descriptor_init'
   drivers/built-in.o: In function `ccp_dmaengine_register':
   drivers/misc/amd-sp/ccp-dmaengine.c:705: undefined reference to `dma_async_device_register'
   drivers/built-in.o: In function `ccp_dmaengine_unregister':
   drivers/misc/amd-sp/ccp-dmaengine.c:724: undefined reference to `dma_async_device_unregister'
   drivers/built-in.o: In function `img_ascii_lcd_probe':
   drivers/auxdisplay/img-ascii-lcd.c:384: undefined reference to `devm_ioremap_resource'
   collect2: error: ld returned 1 exit status

vim +141 drivers/misc/amd-sp/ccp-ops.c

63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  106  }
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  107  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  108  static void ccp_dm_free(struct ccp_dm_workarea *wa)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  109  {
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  110  	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  111  		if (wa->address)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12 @112  			dma_pool_free(wa->dma_pool, wa->address,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  113  				      wa->dma.address);
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  114  	} else {
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  115  		if (wa->dma.address)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  116  			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  117  					 wa->dma.dir);
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  118  		kfree(wa->address);
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  119  	}
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  120  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  121  	wa->address = NULL;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  122  	wa->dma.address = 0;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  123  }
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  124  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  125  static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  126  				struct ccp_cmd_queue *cmd_q,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  127  				unsigned int len,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  128  				enum dma_data_direction dir)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  129  {
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  130  	memset(wa, 0, sizeof(*wa));
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  131  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  132  	if (!len)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  133  		return 0;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  134  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  135  	wa->dev = cmd_q->ccp->dev;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  136  	wa->length = len;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  137  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  138  	if (len <= CCP_DMAPOOL_MAX_SIZE) {
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  139  		wa->dma_pool = cmd_q->dma_pool;
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  140  
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12 @141  		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  142  					     &wa->dma.address);
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  143  		if (!wa->address)
63b94509 drivers/crypto/ccp/ccp-ops.c Tom Lendacky 2013-11-12  144  			return -ENOMEM;

:::::: The code at line 141 was first introduced by commit
:::::: 63b945091a070d8d4275dc0f7699ba22cd5f9435 crypto: ccp - CCP device driver and interface support

:::::: TO: Tom Lendacky <thomas.lendacky@amd.com>
:::::: CC: Herbert Xu <herbert@gondor.apana.org.au>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 18805 bytes --]

^ permalink raw reply

* Re: [PATCH 00/13] crypto: copy AAD during encrypt for AEAD ciphers
From: Cyrille Pitchen @ 2017-01-20 17:07 UTC (permalink / raw)
  To: Herbert Xu, Stephan Müller; +Cc: linux-crypto
In-Reply-To: <20170113113906.GA24096@gondor.apana.org.au>

Hi all,

Le 13/01/2017 à 12:39, Herbert Xu a écrit :
> On Fri, Jan 13, 2017 at 12:36:56PM +0100, Stephan Müller wrote:
>>
>> I thought I understood that you would not want to see it in any 
>> implementation. But, ok, if you want to leave it.
> 
> If you remove it from authenc then authenc will be broken.
> 

Hence if the copy of the associated data is needed in the crypto/authenc.c
driver, then I should also keep this copy in the drivers/crypto/atmel-aes.c
for authenc(hmac(shaX),cbc-aes) algorithms [1], shouldn't I?

If so, should I keep the current not optimized implementation of
atmel_aes_authenc_copy_assoc() or try to use the code extracted by Stephan
from crypto/authenc.c using the null cipher as proposed in this thread?

As said earlier in this thread, copying the associated data is not a so big
deal when compared to the main crypto processing.

For instance, with IPSec ESP with AES in CBC mode, the associated data
layout should be:
Security Parameters Index (SPI): 4 bytes
Sequence Number: 4 bytes
AES IV: 16 bytes

So it's only a 24 byte copy.

I've taken Stephan's other comments into account from his review of the
atmel-authenc driver so I'm preparing a new series but I don't know what to
do for the associated data copy.

Please let me know what you recommend, thanks!

Best regards,

Cyrille


[1] https://lkml.org/lkml/2016/12/22/306

^ permalink raw reply

* Re: [PATCH 0/2] Introduce AMD Secure Processor device
From: Greg KH @ 2017-01-20 17:39 UTC (permalink / raw)
  To: Brijesh Singh
  Cc: thomas.lendacky, herbert, arnd, lambert.quentin, gary.hook,
	linux-kernel, Julia.Lawall, weiyongjun1, linux-crypto,
	umgwanakikbuti
In-Reply-To: <129bc948-6836-bf0f-832e-525f0805c549@amd.com>

On Fri, Jan 20, 2017 at 09:40:49AM -0600, Brijesh Singh wrote:
> 
> On 01/20/2017 02:45 AM, Greg KH wrote:
> > On Thu, Jan 19, 2017 at 02:03:12PM -0600, Brijesh Singh wrote:
> > > Hi Greg,
> > > 
> > > On 01/19/2017 12:21 PM, Greg KH wrote:
> > > > On Thu, Jan 19, 2017 at 01:07:50PM -0500, Brijesh Singh wrote:
> > > > > The CCP device (drivers/crypto/ccp/ccp.ko) is part of AMD Secure Processor,
> > > > > which is not dedicated solely to crypto. The AMD Secure Processor includes
> > > > > CCP and PSP (Platform Secure Processor) devices.
> > > > > 
> > > > > This patch series moves the CCP device driver to the misc directory and
> > > > > creates a framework that allows functional component of the AMD Secure
> > > > > Processor to be initialized and handled appropriately.
> > > > 
> > > > Why the misc directory?  I don't see the justification here...
> > > > 
> > > 
> > > Since this driver is not solely for crypto purposes and do not fit in any of
> > > the standard categories hence I thought of moving it into misc directory. I
> > > am open to other suggestions unless Herbert is ok with leaving it into
> > > crypto and allowing the addition of the Secure Processor support.
> > > 
> > > The patch series allows the CCP driver to support other Secure Processor
> > > functions, e.g Secure Encrypted Virtualization (SEV) key management. In
> > > past, I tried to add SEV support into existing CCP driver [1] but we quickly
> > > learned that CCP driver should be moved outside the crypto directory
> > > otherwise will end up adding non crypto code into drivers/crypto directory.
> > > Once this cleanup is accepted then I can work to add SEV support inside the
> > > CCP driver.
> > > 
> > > [1] http://marc.info/?l=linux-kernel&m=147204118426151&w=2
> > 
> > Ok, what type of interface will this driver have with userspace and/or
> > other parts of the kernel?  Is there a misc char device burried in there
> > somewhere (I couldn't find it in the big diff sent out), or is this
> > driver just creating specific apis that other parts of the kernel will
> > call if available?
> > 
> 
> Eventually, the driver will export functions which will be used by KVM
> to encrypt the guest memory and more. Additionally, If SEV device is
> detected then driver will create a misc char device which can be used by
> userspace to import/export certificates etc.

Why create a new api for certificates, why not just use the existing
kernel key handling for it?

Having a random char device for something like this is going to be rough
to approve, I'll wait for the patches before I start objecting really
hard :)

thanks,

greg k-h

^ permalink raw reply

* Re: [PATCH 2/2] misc: amd-sp: introduce the AMD Secure Processor device
From: kbuild test robot @ 2017-01-20 19:51 UTC (permalink / raw)
  To: Brijesh Singh
  Cc: kbuild-all, thomas.lendacky, herbert, arnd, gregkh,
	lambert.quentin, gary.hook, linux-kernel, Julia.Lawall,
	weiyongjun1, linux-crypto, umgwanakikbuti, brijesh.singh
In-Reply-To: <148484929157.30852.14374439914599162367.stgit@brijesh-build-machine>

[-- Attachment #1: Type: text/plain, Size: 10497 bytes --]

Hi Brijesh,

[auto build test ERROR on cryptodev/master]
[also build test ERROR on next-20170120]
[cannot apply to char-misc/char-misc-testing v4.10-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Brijesh-Singh/Introduce-AMD-Secure-Processor-device/20170120-185157
base:   https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
config: um-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um 

All error/warnings (new ones prefixed by >>):

warning: (AMD_CCP && SND_SOC_SH4_SIU) selects DMADEVICES which has unmet direct dependencies (HAS_DMA)
   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xc9a1): warning: Using 'getgrnam' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xc7ec): warning: Using 'getpwuid' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `vde_open_real':
   (.text+0xcb05): warning: Using 'getaddrinfo' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoaddr':
   (.text+0x1d595): warning: Using 'gethostbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametonetaddr':
   (.text+0x1d635): warning: Using 'getnetbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoproto':
   (.text+0x1d855): warning: Using 'getprotobyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   arch/um/drivers/built-in.o: In function `pcap_nametoport':
   (.text+0x1d687): warning: Using 'getservbyname' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking
   crypto/built-in.o: In function `async_memcpy':
   include/linux/dma-mapping.h:257: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:257: undefined reference to `bad_dma_ops'
   crypto/built-in.o: In function `async_xor':
   include/linux/dma-mapping.h:257: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:257: undefined reference to `bad_dma_ops'
   crypto/built-in.o: In function `async_xor_val':
   include/linux/dma-mapping.h:257: undefined reference to `bad_dma_ops'
   crypto/built-in.o:include/linux/dma-mapping.h:257: more undefined references to `bad_dma_ops' follow
   drivers/built-in.o: In function `dwc_desc_get':
>> include/linux/dmapool.h:30: undefined reference to `dma_pool_alloc'
   drivers/built-in.o: In function `dwc_desc_put':
>> drivers/dma/dw/core.c:137: undefined reference to `dma_pool_free'
   drivers/dma/dw/core.c:133: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `dw_dma_probe':
>> drivers/dma/dw/core.c:1508: undefined reference to `dmam_pool_create'
   drivers/built-in.o: In function `dw_probe':
>> drivers/dma/dw/platform.c:192: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `dw_probe':
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `fsl_edma_free_desc':
>> drivers/dma/fsl-edma.c:288: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `fsl_edma_alloc_desc':
>> drivers/dma/fsl-edma.c:525: undefined reference to `dma_pool_alloc'
   drivers/dma/fsl-edma.c:525: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `fsl_edma_free_chan_resources':
>> drivers/dma/fsl-edma.c:808: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `fsl_edma_alloc_chan_resources':
>> drivers/dma/fsl-edma.c:788: undefined reference to `dma_pool_create'
   drivers/built-in.o: In function `fsl_edma_probe':
>> drivers/dma/fsl-edma.c:898: undefined reference to `devm_ioremap_resource'
   drivers/dma/fsl-edma.c:906: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `idma64_desc_free':
>> drivers/dma/idma64.c:214: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `idma64_free_chan_resources':
>> drivers/dma/idma64.c:520: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `idma64_alloc_chan_resources':
>> drivers/dma/idma64.c:504: undefined reference to `dma_pool_create'
   drivers/built-in.o: In function `idma64_platform_probe':
>> drivers/dma/idma64.c:644: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `idma64_platform_probe':
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `idma64_prep_slave_sg':
>> drivers/dma/idma64.c:315: undefined reference to `dma_pool_alloc'
   drivers/built-in.o: In function `hidma_mgmt_probe':
>> drivers/dma/qcom/hidma_mgmt.c:165: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `hidma_ll_init':
>> drivers/dma/qcom/hidma_ll.c:749: undefined reference to `dmam_alloc_coherent'
   drivers/built-in.o: In function `hidma_probe':
>> drivers/dma/qcom/hidma.c:735: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `hidma_probe':
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:542: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   drivers/built-in.o:include/linux/dma-mapping.h:542: more undefined references to `bad_dma_ops' follow
   drivers/built-in.o: In function `sp_platform_probe':
>> drivers/misc/amd-sp/sp-platform.c:133: undefined reference to `devm_ioremap_resource'
   drivers/built-in.o: In function `sp_platform_probe':
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:555: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:544: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_dev_suspend':
>> include/linux/spinlock.h:362: undefined reference to `ccp_queues_suspended'
   drivers/built-in.o: In function `ccp_destroy':
   drivers/misc/amd-sp/ccp-dev-v3.c:534: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp_init':
   drivers/misc/amd-sp/ccp-dev-v3.c:370: undefined reference to `dma_pool_create'
   drivers/misc/amd-sp/ccp-dev-v3.c:489: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp5_destroy':
   include/linux/dma-mapping.h:484: undefined reference to `bad_dma_ops'
   include/linux/dma-mapping.h:490: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp5_init':
   drivers/misc/amd-sp/ccp-dev-v5.c:707: undefined reference to `dma_pool_create'
   drivers/built-in.o: In function `ccp5_init':
   include/linux/dma-mapping.h:463: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp5_init':
   drivers/misc/amd-sp/ccp-dev-v5.c:892: undefined reference to `dma_pool_destroy'
   drivers/built-in.o: In function `ccp_init_dm_workarea':
   drivers/misc/amd-sp/ccp-ops.c:141: undefined reference to `dma_pool_alloc'
   drivers/built-in.o: In function `ccp_init_dm_workarea':
   include/linux/dma-mapping.h:190: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_sg_free':
   include/linux/dma-mapping.h:242: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_dm_free':
   drivers/misc/amd-sp/ccp-ops.c:112: undefined reference to `dma_pool_free'
   drivers/built-in.o: In function `ccp_dm_free':
   include/linux/dma-mapping.h:207: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `ccp_init_data':
   include/linux/dma-mapping.h:227: undefined reference to `bad_dma_ops'
   drivers/built-in.o: In function `img_ascii_lcd_probe':
   drivers/auxdisplay/img-ascii-lcd.c:384: undefined reference to `devm_ioremap_resource'
   collect2: error: ld returned 1 exit status

vim +30 include/linux/dmapool.h

dd0fc66f Al Viro         2005-10-07  24  void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
3a11ec5e Victor Fusco    2005-09-10  25  		     dma_addr_t *handle);
^1da177e Linus Torvalds  2005-04-16  26  
ad82362b Sean O. Stalley 2015-09-08  27  static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags,
ad82362b Sean O. Stalley 2015-09-08  28  				    dma_addr_t *handle)
ad82362b Sean O. Stalley 2015-09-08  29  {
ad82362b Sean O. Stalley 2015-09-08 @30  	return dma_pool_alloc(pool, mem_flags | __GFP_ZERO, handle);
ad82362b Sean O. Stalley 2015-09-08  31  }
ad82362b Sean O. Stalley 2015-09-08  32  
^1da177e Linus Torvalds  2005-04-16  33  void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t addr);

:::::: The code at line 30 was first introduced by commit
:::::: ad82362b2defd4adad87d8538617b2f51a4bf9c3 mm: add dma_pool_zalloc() call to DMA API

:::::: TO: Sean O. Stalley <sean.stalley@intel.com>
:::::: CC: Linus Torvalds <torvalds@linux-foundation.org>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 18897 bytes --]

^ permalink raw reply

* Re: [PATCH 00/13] crypto: copy AAD during encrypt for AEAD ciphers
From: Stephan Müller @ 2017-01-20 20:28 UTC (permalink / raw)
  To: Cyrille Pitchen; +Cc: Herbert Xu, linux-crypto
In-Reply-To: <2a4fab23-3ea7-a0f7-7c30-55d301f51ec9@atmel.com>

Am Freitag, 20. Januar 2017, 18:07:04 CET schrieb Cyrille Pitchen:

Hi Cyrille,

> 
> I've taken Stephan's other comments into account from his review of the
> atmel-authenc driver so I'm preparing a new series but I don't know what to
> do for the associated data copy.
> 
> Please let me know what you recommend, thanks!

The question is where the null context shall be saved. As Herbert mentioned, 
he may not want to have it in crypto_aead.

Herbert, as this implementation also requires the copy, shall we leave the 
null context in crypto_aead? If so, Cyrille can simply use patch 01 from this 
series and apply the change to his code similar to what I did in patches 02 - 
13.

Ciao
Stephan

^ permalink raw reply

* [PATCH v3 0/4] Update LZ4 compressor module
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip
In-Reply-To: <1482259992-16680-1-git-send-email-4sschmid@informatik.uni-hamburg.de>


This patchset is for updating the LZ4 compression module to a version based
on LZ4 v1.7.3 allowing to use the fast compression algorithm aka LZ4 fast
which provides an "acceleration" parameter as a tradeoff between
high compression ratio and high compression speed.

We want to use LZ4 fast in order to support compression in lustre
and (mostly, based on that) investigate data reduction techniques in behalf of
storage systems.

Also, it will be useful for other users of LZ4 compression, as with LZ4 fast
it is possible to enable applications to use fast and/or high compression
depending on the usecase.
For instance, ZRAM is offering a LZ4 backend and could benefit from an updated
LZ4 in the kernel.

LZ4 homepage: http://www.lz4.org/
LZ4 source repository: https://github.com/lz4/lz4
Source version: 1.7.3

Benchmark (taken from [1], Core i5-4300U @1.9GHz):
----------------|--------------|----------------|----------
Compressor      | Compression  | Decompression  | Ratio
----------------|--------------|----------------|----------
memcpy          |  4200 MB/s   |  4200 MB/s     | 1.000
LZ4 fast 50     |  1080 MB/s   |  2650 MB/s     | 1.375
LZ4 fast 17     |   680 MB/s   |  2220 MB/s     | 1.607
LZ4 fast 5      |   475 MB/s   |  1920 MB/s     | 1.886
LZ4 default     |   385 MB/s   |  1850 MB/s     | 2.101

[1] http://fastcompression.blogspot.de/2015/04/sampling-or-faster-lz4.html

[PATCHv2 1/4] lib: Update LZ4  compressor module based on LZ4 v1.7.2
[PATCHv2 2/4] lib: Update decompress_unlz4 wrapper to work with new LZ4 module
[PATCHv2 3/4] crypto: Change lz4 modules to work with new LZ4 module
[PATCHv2 4/4] fs/pstore: fs/squashfs: Change LZ4 compressor functions to work with new LZ4 module

v2:
- Changed order of the patches since in the initial patchset the lz4.h was in the
  last patch but was referenced by the other ones
- Split lib/decompress_unlz4.c in an own patch
- Fixed errors reported by the buildbot
- Further refactorings
- Added more appropriate copyright note to include/linux/lz4.h

v3:
- Adjusted the code to satisfy kernel coding style (checkpatch.pl)
- Made sure the changes to LZ4 in Kernel (overflow checks etc.)
are included in the new module (they are)
- Removed the second LZ4_compressBound function with related name but
different return type
- Corrected version number (was LZ4 1.7.3)

^ permalink raw reply

* [PATCH 1/4] lib: Update LZ4 compressor module
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt
In-Reply-To: <1485011351-7463-1-git-send-email-4sschmid@informatik.uni-hamburg.de>

This patch updates LZ4 kernel module to LZ4 v1.7.3 by Yann Collet.
The kernel module is inspired by the previous work by Chanho Min.
The updated LZ4 module will not break existing code since there were alias
methods added to ensure backwards compatibility.

API changes:

New method LZ4_compress_fast which differs from the variant available
in kernel by the new acceleration parameter,
allowing to trade compression ratio for more compression speed
and vice versa.

LZ4_decompress_fast is the respective decompression method, featuring a very
fast decoder (multiple GB/s per core), able to reach RAM speed in multi-core
systems. The decompressor allows to decompress data compressed with
LZ4 fast as well as the LZ4 HC (high compression) algorithm.

Also the useful functions LZ4_decompress_safe_partial
LZ4_compress_destsize were added. The latter reverses the logic by trying to
compress as much data as possible from source to dest while the former aims
to decompress partial blocks of data.

A bunch of streaming functions were also added
which allow compressig/decompressing data in multiple steps
(so called "streaming mode").

The methods lz4_compress and lz4_decompress_unknownoutputsize
are now known as LZ4_compress_default respectivley LZ4_decompress_safe.
The old methods are still available for providing backwards compatibility.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 include/linux/lz4.h      |  581 +++++++++++++++++++++---
 lib/lz4/lz4_compress.c   | 1103 ++++++++++++++++++++++++++++++++--------------
 lib/lz4/lz4_decompress.c |  694 ++++++++++++++++++-----------
 lib/lz4/lz4defs.h        |  320 ++++++++------
 lib/lz4/lz4hc_compress.c |  855 ++++++++++++++++++++++-------------
 5 files changed, 2477 insertions(+), 1076 deletions(-)

diff --git a/include/linux/lz4.h b/include/linux/lz4.h
index 6b784c5..5bd21c1 100644
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,87 +1,546 @@
-#ifndef __LZ4_H__
-#define __LZ4_H__
-/*
- * LZ4 Kernel Interface
+/* LZ4 Kernel Interface
  *
  * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * This file is based on the original header file
+ * for LZ4 - Fast LZ compression algorithm.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *		* Redistributions of source code must retain the above copyright
+ *		  notice, this list of conditions and the following disclaimer.
+ *		* Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
+ */
+
+#ifndef __LZ4_H__
+#define __LZ4_H__
+
+#include <linux/types.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
+/*-************************************************************************
+ *	CONSTANTS
+ **************************************************************************/
+/*
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes
+ * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#define LZ4_MEMORY_USAGE 10
+
+#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)	(\
+	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
+	? 0 \
+	: (isize) + ((isize)/255) + 16)
+
+#define LZ4_ACCELERATION_DEFAULT 1
+#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
+
+#define LZ4HC_MIN_CLEVEL				3
+#define LZ4HC_DEFAULT_CLEVEL		9
+#define LZ4HC_MAX_CLEVEL				16
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE-1)
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/*-************************************************************************
+ *	STREAMING CONSTANTS AND STRUCTURES
+ **************************************************************************/
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
+
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
+
+#define LZ4_STREAMDECODESIZE_U64	4
+#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
+	sizeof(unsigned long long))
+
+/*
+ * LZ4_stream_t :
+ * information structure to track an LZ4 stream.
+ */
+typedef struct {
+		uint32_t hashTable[LZ4_HASH_SIZE_U32];
+		uint32_t currentOffset;
+		uint32_t initCheck;
+		const uint8_t *dictionary;
+		uint8_t *bufferStart;
+		uint32_t dictSize;
+} LZ4_stream_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMSIZE_U64];
+		LZ4_stream_t_internal internal_donotuse;
+} LZ4_stream_t;
+
+/*
+ * LZ4_streamHC_t :
+ * information structure to track an LZ4HC stream.
+ */
+typedef struct {
+		unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
+		unsigned short	 chainTable[LZ4HC_MAXD];
+		/* next block to continue on current prefix */
+		const unsigned char *end;
+		/* All index relative to this position */
+		const unsigned char *base;
+		/* alternate base for extDict */
+		const unsigned char *dictBase;
+		/* below that point, need extDict */
+		unsigned int	 dictLimit;
+		/* below that point, no more dict */
+		unsigned int	 lowLimit;
+		/* index from which to continue dict update */
+		unsigned int	 nextToUpdate;
+		unsigned int	 compressionLevel;
+} LZ4HC_CCtx_internal;
+typedef union {
+		size_t table[LZ4_STREAMHCSIZE_SIZET];
+		LZ4HC_CCtx_internal internal_donotuse;
+} LZ4_streamHC_t;
+
+/*
+ * LZ4_streamDecode_t :
+ * information structure to track an LZ4 stream during decompression.
+ * init this structure using LZ4_setStreamDecode
+ * (or memset()) before first use
  */
-#define LZ4_MEM_COMPRESS	(16384)
-#define LZ4HC_MEM_COMPRESS	(262144 + (2 * sizeof(unsigned char *)))
+typedef struct {
+		const uint8_t *externalDict;
+		size_t extDictSize;
+		const uint8_t *prefixEnd;
+		size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+typedef union {
+		unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+		LZ4_streamDecode_t_internal internal_donotuse;
+} LZ4_streamDecode_t;
+
+/*-************************************************************************
+ *	SIZE OF STATE
+ **************************************************************************/
+#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
+
+/*-************************************************************************
+ *	Compression Functions
+ **************************************************************************/
 
 /*
- * lz4_compressbound()
- * Provides the maximum size that LZ4 may output in a "worst case" scenario
- * (input data not compressible)
+ * LZ4_compressBound() :
+ *	Provides the maximum size that LZ4 may output in a "worst case" scenario
+ *	(input data not compressible)
  */
-static inline size_t lz4_compressbound(size_t isize)
+static inline int LZ4_compressBound(size_t isize)
 {
-	return isize + (isize / 255) + 16;
+	return (int)LZ4_COMPRESSBOUND(isize);
 }
 
 /*
+ * LZ4_compress_default()
+ *	Compresses 'sourceSize' bytes from buffer 'source'
+ *	into already allocated 'dest' buffer of size 'maxOutputSize'.
+ *	Compression is guaranteed to succeed if
+ *	'maxOutputSize' >= LZ4_compressBound(inputSize).
+ *	It also runs faster, so it's a recommended setting.
+ *	If the function cannot compress 'source'
+ *	into a more limited 'dest' budget,
+ *	compression stops *immediately*,
+ *	and the function result is zero.
+ *	As a consequence, 'dest' content is not valid.
+ *
+ *	source       : source address of the original data
+ *	dest         : output buffer address
+ *                 of the compressed data
+ *	inputSize    : Max supported value is
+ *                 LZ4_MAX_INPUT_SIZE
+ *	maxOutputSize: full or partial size of buffer 'dest'
+ *                 (which must be already allocated)
+ *	workmem      : address of the working memory.
+ *                 This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return       : the number of bytes written into buffer 'dest'
+ *   (necessarily <= maxOutputSize) or 0 if compression fails
+ */
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem);
+
+/*
+ * LZ4_compress_fast() :
+ *	Same as LZ4_compress_default(),
+ *	but allows to select an "acceleration" factor.
+ *	The larger the acceleration value,
+ *	the faster the algorithm,
+ *	but also the lesser the compression.
+ *	It's a trade-off. It can be fine tuned,
+ *	with each successive value
+ *	providing roughly +~3% to speed.
+ *	An acceleration value of "1"
+ *	is the same as regular LZ4_compress_default()
+ *	Values <= 0 will be replaced by
+ *	LZ4_ACCELERATION_DEFAULT, which is 1.
+ */
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration);
+
+/*
+ * LZ4_compress_destSize()
+ * Reverse the logic, by compressing as much data as possible
+ * from 'source' buffer into already allocated buffer 'dest'
+ * of size 'targetDestSize'.
+ * This function either compresses the entire 'source' content into 'dest'
+ * if it's large enough, or fill 'dest' buffer completely with as much data as
+ * ossible from 'source'.
+ *
+ *	source	      : source address of the original data
+ *	dest	        : output buffer address of the compressed data
+ *	inputSize	    : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	*sourceSizePtr: will be modified to indicate how many bytes where read
+ *                  from 'source' to fill 'dest'.
+ *                  New value is necessarily <= old value.
+ *	workmem       : address of the working memory.
+ *                  This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+			 or 0 if compression fails
+ */
+int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
+	int targetDestSize, void *wrkmem);
+
+/*
  * lz4_compress()
- *	src     : source address of the original data
- *	src_len : size of the original data
- *	dst	: output buffer address of the compressed data
- *		This requires 'dst' of size LZ4_COMPRESSBOUND.
- *	dst_len : is the output size, which is returned after compress done
- *	workmem : address of the working memory.
- *		This requires 'workmem' of size LZ4_MEM_COMPRESS.
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer and workmem must be already allocated with
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ *	return	: Success if return 0
+ *            Error if return (< 0)
+ *	note :	Destination buffer and workmem must be already allocated with
  *		the defined size.
  */
-int lz4_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
- /*
-  * lz4hc_compress()
-  *	 src	 : source address of the original data
-  *	 src_len : size of the original data
-  *	 dst	 : output buffer address of the compressed data
-  *		This requires 'dst' of size LZ4_COMPRESSBOUND.
-  *	 dst_len : is the output size, which is returned after compress done
-  *	 workmem : address of the working memory.
-  *		This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
-  *	 return  : Success if return 0
-  *		   Error if return (< 0)
-  *	 note :  Destination buffer and workmem must be already allocated with
-  *		 the defined size.
-  */
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-		unsigned char *dst, size_t *dst_len, void *wrkmem);
-
-/*
- * lz4_decompress()
- *	src     : source address of the compressed data
- *	src_len : is the input size, whcih is returned after decompress done
- *	dest	: output buffer address of the decompressed data
- *	actual_dest_len: is the size of uncompressed data, supposing it's known
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
- *		slightly faster than lz4_decompress_unknownoutputsize()
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*-************************************************************************
+ *	Decompression Functions
+ **************************************************************************/
+
+/*
+ * LZ4_decompress_fast()
+ *	source      : source address of the compressed data
+ *	dst         : output buffer address of the decompressed data
+ *	originalSize: is the original and therefore uncompressed size
+ *	return : the number of bytes read from the source buffer
+ *           (in other words, the compressed size)
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           Destination buffer must be already allocated.
+ *           Its size must be a minimum of 'originalSize' bytes.
+ *	note   : This function fully respect memory boundaries
+ *           for properly formed compressed data.
+ *           It is a bit faster than LZ4_decompress_safe().
+ *           However, it does not provide any protection against intentionally
+ *           modified data stream (malicious input).
+ *           Use this function in trusted environment only
+ *           (data to decode comes from a trusted source).
  */
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len);
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_safe()
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return : the number of bytes decompressed into destination buffer
+ *           (necessarily <= maxDecompressedSize)
+ *           If destination buffer is not large enough, decoding will stop
+ *           and output an error code (<0).
+ *           If the source stream is detected malformed, the function will
+ *           stop decoding and return a negative result.
+ *           This function is protected against buffer overflow exploits,
+ *           including malicious data packets. It never writes outside
+ *           output buffer, nor reads outside input buffer.
+ */
+int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+
+/*
+ * LZ4_decompress_safe_partial() :
+ * This function decompress a compressed block of size 'compressedSize'
+ * at position 'source' into destination buffer 'dest'
+ * of size 'maxDecompressedSize'.
+ * The function tries to stop decompressing operation as soon as
+ * 'targetOutputSize' has been reached, reducing decompression time.
+ *
+ *	source             : source address of the compressed data
+ *	dest               : output buffer address of the decompressed data
+ *	compressedSize     : is the precise full size of the compressed block.
+ *	targetOutputSize   : the decompression operation will try
+ *                       to stop as soon as 'targetOutputSize'
+ *                       has been reached
+ *	maxDecompressedSize: is the size of destination buffer,
+ *                       which must be already allocated.
+ *	return  : the number of bytes decoded in the destination buffer
+ *            (necessarily <= maxDecompressedSize)
+ *	Note    : this number can be < 'targetOutputSize' should the compressed
+ *            block to decode be smaller. Always control how many bytes
+ *            were decoded. If the source stream is detected malformed,
+ *            the function will stop decoding and return a negative result.
+ *            This function never writes outside of output buffer,
+ *            and never reads outside of input buffer.
+ *            It is therefore protected against malicious data packets
+ */
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize);
+
 
 /*
- * lz4_decompress_unknownoutputsize()
+ * lz4_decompress_unknownoutputsize() :
  *	src     : source address of the compressed data
  *	src_len : is the input size, therefore the compressed size
- *	dest	: output buffer address of the decompressed data
+ *	dest    : output buffer address of the decompressed data
  *	dest_len: is the max size of the destination buffer, which is
- *			returned with actual size of decompressed data after
- *			decompress done
- *	return  : Success if return 0
- *		  Error if return (< 0)
- *	note :  Destination buffer must be already allocated.
+ *            returned with actual size of decompressed data after
+ *            decompress done
+ * return: Success if return 0
+ *         Error if return (< 0)
+ * note:   Destination buffer must be already allocated.
  */
 int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len);
+	unsigned char *dest, size_t *dest_len);
+
+/*
+ * lz4_decompress() :
+ *	src            : source address of the compressed data
+ *	src_len        : is the input size,
+ *                   which is returned after decompress done
+ *	dest           : output buffer address of the decompressed data
+ *	actual_dest_len: is the size of uncompressed data, supposing it's known
+ *	return: Success if return 0
+ *          Error if return (< 0)
+ *	note  : Destination buffer must be already allocated.
+ *          slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len);
+
+/*-************************************************************************
+ *	LZ4 HC Compression
+ **************************************************************************/
+
+/*! LZ4_compress_HC() :
+ * Compress data from `src` into `dst`, using the more powerful
+ * but slower "HC" algorithm. dst` must be already allocated.
+ * Compression is guaranteed to succeed if
+ * `dstCapacity >= LZ4_compressBound(srcSize)
+ * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE
+ *
+ *	src             : source address of the original data
+ *	dst             : output buffer address of the compressed data
+ *	srcSize         : Max supported value is LZ4_MAX_INPUT_SIZE
+ *	dstCapacity     : full or partial size of buffer 'dst'
+ *                    (which must be already allocated)
+ *	compressionLevel: Recommended values are between 4 and 9, although any
+ *                    value between 1 and LZ4HC_MAX_CLEVEL will work.
+ *                    Values >LZ4HC_MAX_CLEVEL behave the same as 16.
+ *	wrkmem          : address of the working memory.
+ *                    This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
+ *	return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
+	int compressionLevel, void *wrkmem);
+
+/*
+ * lz4hc_compress()
+ *	src    : source address of the original data
+ *	src_len: size of the original data
+ *	dst    : output buffer address of the compressed data
+ *           This requires 'dst' of size LZ4_COMPRESSBOUND.
+ *	dst_len: is the output size, which is returned after compress done
+ *	workmem: address of the working memory.
+ *           This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return	: Success if return 0
+ *          Error if return (< 0)
+ * note   : Destination buffer and workmem must be already allocated with
+ *          the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem);
+
+/*
+ *	These functions compress data in successive blocks of any size,
+ *	using previous blocks as dictionary. One key assumption is that previous
+ *	blocks (up to 64 KB) remain read-accessible while
+ *	compressing next blocks.
+ *	There is an exception for ring buffers, which can be smaller than 64 KB.
+ *	Ring buffers scenario is automatically detected and handled by
+ *	LZ4_compress_HC_continue().
+ *	Before starting compression, state must be properly initialized,
+ *	using LZ4_resetStreamHC().
+ *	A first "fictional block" can then be designated as initial dictionary,
+ *	using LZ4_loadDictHC() (Optional).
+ *	Then, use LZ4_compress_HC_continue() to compress each successive block.
+ *	Previous memory blocks (including initial dictionary when present) must
+ *	remain accessible and unmodified during compression.
+ *	'dst' buffer should be sized to handle worst case scenarios, using
+ *	LZ4_compressBound(), to ensure operation success.
+ *	If, for any reason, previous data blocks can't be preserved unmodified
+ *	in memory during next compression block,
+ *	you must save it to a safer memory space, using LZ4_saveDictHC().
+ *	Return value of LZ4_saveDictHC() is the size of dictionary
+ *	effectively saved into 'safeBuffer'.
+ */
+void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
+int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
+	int dictSize);
+int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize);
+int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+	int maxDictSize);
+
+/*-*********************************************
+ *	Streaming Compression Functions
+ ***********************************************/
+/*
+ * LZ4_resetStream() :
+ *	An LZ4_stream_t structure can be allocated once
+ *	and re-used multiple times.
+ *	Use this function to init an allocated `LZ4_stream_t` structure
+ *	and start a new compression.
+ */
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
+
+/*
+ * LZ4_loadDict() :
+ *	Use this function to load a static dictionary into LZ4_stream.
+ *	Any previous data will be forgotten, only 'dictionary'
+ *	will remain in memory.
+ *	Loading a size of 0 is allowed.
+ *	Return : dictionary size, in bytes (necessarily <= 64 KB)
+ */
+int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+	int dictSize);
+
+/*
+ * LZ4_compress_fast_continue() :
+ *	Compress buffer content 'src',
+ *	using data from previously compressed blocks
+ *	as dictionary to improve compression ratio.
+ *	Important : Previous data blocks are assumed to still
+ *	be present and unmodified !
+ *	'dst' buffer must be already allocated.
+ *	If maxDstSize >= LZ4_compressBound(srcSize),
+ *	compression is guaranteed to succeed, and runs faster.
+ *	If not, and if compressed data cannot fit into 'dst' buffer size,
+ *	compression stops, and function returns a zero.
+ */
+int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
+	char *dst, int srcSize, int maxDstSize, int acceleration);
+
+/*
+ * LZ4_saveDict() :
+ *	If previously compressed data block is not guaranteed
+ *	to remain available at its memory location,
+ *	save it into a safer place (char *safeBuffer).
+ *	Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call
+ *         LZ4_compress_fast_continue().
+ *	Return : saved dictionary size in bytes (necessarily <= dictSize),
+ *           or 0 if error.
+ */
+int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
+
+/*
+ * LZ4_setStreamDecode() :
+ *	Use this function to instruct where to find the dictionary.
+ *	Setting a size of 0 is allowed (same effect as reset).
+ *	@return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize);
+
+/*
+ * LZ4_decompress_*_continue() :
+ *	These decoding functions allow decompression of multiple blocks
+ *	in "streaming" mode.
+ *	Previously decoded blocks *must* remain available at the memory position
+ *	where they were decoded (up to 64 KB)
+ *	In the case of a ring buffers, decoding buffer must be either :
+ *    - Exactly same size as encoding buffer, with same update rule
+ *      (block boundaries at same positions) In which case,
+ *      the decoding & encoding ring buffer can have any size,
+ *      including very small ones ( < 64 KB).
+ *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *      maxBlockSize is implementation dependent.
+ *      It's the maximum size you intend to compress into a single block.
+ *      In which case, encoding and decoding buffers do not need
+ *      to be synchronized, and encoding ring buffer can have any size,
+ *      including small ones ( < 64 KB).
+ *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+ *      In which case, encoding and decoding buffers do not need to be
+ *      synchronized, and encoding ring buffer can have any size,
+ *      including larger than decoding buffer. W
+ * Whenever these conditions are not possible, save the last 64KB of decoded
+ * data into a safe buffer, and indicate where it is saved
+ * using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize,
+	int maxDecompressedSize);
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize);
+
+/*
+ * LZ4_decompress_*_usingDict() :
+ *	These decoding functions work the same as
+ *	a combination of LZ4_setStreamDecode() followed by
+ *	LZ4_decompress_*_continue()
+ *	They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ */
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize, const char *dictStart,
+	int dictSize);
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize);
+
 #endif
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index 28321d8..4cd6d1b 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,19 +1,16 @@
 /*
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,417 +22,873 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lz4.h>
 #include <asm/unaligned.h>
-#include "lz4defs.h"
 
-/*
- * LZ4_compressCtx :
- * -----------------
- * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
- * maximum size 'maxOutputSize'.  * If it cannot achieve it, compression
- * will stop, and result of the function will be zero.
- * return : the number of bytes written in buffer 'dest', or 0 if the
- * compression fails
- */
-static inline int lz4_compressctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+/*-******************************
+ *	Compression functions
+ ********************************/
+static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
 {
-	HTYPE *hashtable = (HTYPE *)ctx;
-	const u8 *ip = (u8 *)source;
-#if LZ4_ARCH64
-	const BYTE * const base = ip;
+	if (tableType == byU16)
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - (LZ4_HASHLOG + 1)));
+	else
+		return ((sequence * 2654435761U)
+			>> ((MINMATCH*8) - LZ4_HASHLOG));
+}
+
+static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+	const U32 hashLog = (tableType == byU16)
+		? LZ4_HASHLOG + 1
+		: LZ4_HASHLOG;
+
+#ifdef __LITTLE_ENDIAN__
+	static const U64 prime5bytes = 889523592379ULL;
+
+	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
 #else
-	const int base = 0;
+	static const U64 prime8bytes = 11400714785074694791ULL;
+
+	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
 #endif
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+}
+
+static U32 LZ4_hashPosition(const void *p, tableType_t tableType)
+{
+#if LZ4_ARCH64
+	if (tableType == byU32)
+		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+#endif
+
+	return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+static void LZ4_putPositionOnHash(const BYTE *p, U32 h, void *tableBase,
+	tableType_t const tableType, const BYTE *srcBase)
+{
+	switch (tableType) {
+	case byPtr:
+	{
+		const BYTE **hashTable = (const BYTE **)tableBase;
+
+		hashTable[h] = p;
+		return;
+	}
+	case byU32:
+	{
+		U32 *hashTable = (U32 *) tableBase;
+
+		hashTable[h] = (U32)(p - srcBase);
+		return;
+	}
+	case byU16:
+	{
+		U16 *hashTable = (U16 *) tableBase;
+
+		hashTable[h] = (U16)(p - srcBase);
+		return;
+	}
+	}
+}
+
+static inline void LZ4_putPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+static const BYTE *LZ4_getPositionOnHash(U32 h, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	if (tableType == byPtr) {
+		const BYTE **hashTable = (const BYTE **) tableBase;
+
+		return hashTable[h];
+	}
+
+	if (tableType == byU32) {
+		const U32 * const hashTable = (U32 *) tableBase;
+
+		return hashTable[h] + srcBase;
+	}
+
+	{
+		/* default, to ensure a return */
+		const U16 * const hashTable = (U16 *) tableBase;
+		return hashTable[h] + srcBase;
+	}
+}
+
+static inline const BYTE *LZ4_getPosition(const BYTE *p, void *tableBase,
+	tableType_t tableType, const BYTE *srcBase)
+{
+	U32 const h = LZ4_hashPosition(p, tableType);
+
+	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+/*
+ * LZ4_compress_generic() :
+ * inlined, to ensure branches are decided at compilation time
+ */
+static inline int LZ4_compress_generic(
+	LZ4_stream_t_internal * const dictPtr,
+	const char * const source,
+	char * const dest,
+	const int inputSize,
+	const int maxOutputSize,
+	const limitedOutput_directive outputLimited,
+	const tableType_t tableType,
+	const dict_directive dict,
+	const dictIssue_directive dictIssue,
+	const U32 acceleration)
+{
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *base;
+	const BYTE *lowLimit;
+	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
+	const BYTE * const dictionary = dictPtr->dictionary;
+	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
+	const size_t dictDelta = dictEnd - (const BYTE *)source;
+	const BYTE *anchor = (const BYTE *) source;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dest;
+	BYTE * const olimit = op + maxOutputSize;
+
+	U32 forwardH;
+	size_t refDelta = 0;
+
+	/* Init conditions */
+	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
+		/* Unsupported inputSize, too large (or negative) */
+		return 0;
+	}
+	switch (dict) {
+	case noDict:
+	default:
+		base = (const BYTE *)source;
+		lowLimit = (const BYTE *)source;
+		break;
+	case withPrefix64k:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source - dictPtr->dictSize;
+		break;
+	case usingExtDict:
+		base = (const BYTE *)source - dictPtr->currentOffset;
+		lowLimit = (const BYTE *)source;
+		break;
+	}
+
+	if ((tableType == byU16)
+		&& (inputSize >= LZ4_64Klimit)) {
+		/* Size too large (not within 64K limit) */
+		return 0;
+	}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+	if (inputSize < LZ4_minLength) {
+		/* Input too small, no compression (all literals) */
+		goto _last_literals;
+	}
 
 	/* First Byte */
-	hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-	ip++;
-	forwardh = LZ4_HASH_VALUE(ip);
+	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (unlikely(forwardip > mflimit))
-				goto _last_literals;
-
-			forwardh = LZ4_HASH_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = ip - base;
-		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+		{ const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = acceleration
+				<< LZ4_skipTrigger;
+
+			do {
+				U32 const h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h,
+					dictPtr->hashTable,
+					tableType, base);
+				if (dict == usingExtDict) {
+					if (match < (const BYTE *)source) {
+						refDelta = dictDelta;
+						lowLimit = dictionary;
+					} else {
+						refDelta = 0;
+						lowLimit = (const BYTE *)source;
+				}	 }
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
+					tableType, base);
+
+			} while (((dictIssue == dictSmall)
+					? (match < lowRefLimit)
+					: 0)
+				|| ((tableType == byU16)
+					? 0
+					: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match + refDelta)
+					!= LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source) &&
-			unlikely(ip[-1] == ref[-1])) {
+		while (((ip > anchor) & (match + refDelta > lowLimit))
+			&& (unlikely(ip[-1] == match[refDelta - 1]))) {
 			ip--;
-			ref--;
-		}
+			match--;
+			}
 
-		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
-			(length >> 8) > oend))
-			return 0;
+		/* Encode Literals */
+		{ unsigned const int litLength = (unsigned int)(ip - anchor);
 
-		if (length >= (int)RUN_MASK) {
-			int len;
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+			token = op++;
+			if ((outputLimited) &&
+				/* Check output buffer overflow */
+				(unlikely(op + litLength +
+					(2 + 1 + LASTLITERALS) +
+					(litLength/255) > olimit)))
+				return 0;
+			if (litLength >= RUN_MASK) {
+				int len = (int)litLength - RUN_MASK;
+
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
-		while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
-			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
-		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 		/* Encode MatchLength */
-		length = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
-			return 0;
-		if (length >= (int)ML_MASK) {
-			*token += ML_MASK;
-			length -= ML_MASK;
-			for (; length > 509 ; length -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (length > 254) {
-				length -= 255;
-				*op++ = 255;
+		{	 unsigned int matchCode;
+
+			if ((dict == usingExtDict)
+				&& (lowLimit == dictionary)) {
+				const BYTE *limit;
+
+				match += refDelta;
+				limit = ip + (dictEnd - match);
+				if (limit > matchlimit)
+					limit = matchlimit;
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, limit);
+				ip += MINMATCH + matchCode;
+				if (ip == limit) {
+					unsigned const int more = LZ4_count(ip,
+						(const BYTE *)source,
+						matchlimit);
+
+					matchCode += more;
+					ip += more;
+				}
+			} else {
+				matchCode = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, matchlimit);
+				ip += MINMATCH + matchCode;
 			}
-			*op++ = (u8)length;
-		} else
-			*token += length;
+
+			if (outputLimited &&
+				/* Check output buffer overflow */
+				(unlikely(op +
+					(1 + LASTLITERALS) +
+					(matchCode>>8) > olimit)))
+				return 0;
+			if (matchCode >= ML_MASK) {
+				*token += ML_MASK;
+				matchCode -= ML_MASK;
+				LZ4_write32(op, 0xFFFFFFFF);
+				while (matchCode >= 4*255) {
+					op += 4;
+					LZ4_write32(op, 0xFFFFFFFF);
+					matchCode -= 4*255;
+				}
+				op += matchCode / 255;
+				*op++ = (BYTE)(matchCode % 255);
+			} else
+				*token += (BYTE)(matchCode);
+		}
+
+		anchor = ip;
 
 		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		if (ip > mflimit)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH_VALUE(ip)];
-		hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
-		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+		match = LZ4_getPosition(ip, dictPtr->hashTable,
+			tableType, base);
+		if (dict == usingExtDict) {
+			if (match < (const BYTE *)source) {
+				refDelta = dictDelta;
+				lowLimit = dictionary;
+			} else {
+				refDelta = 0;
+				lowLimit = (const BYTE *)source;
+			}
+		}
+		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
+		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
+			&& (match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
 			token = op++;
 			*token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (((char *)op - dest) + lastrun + 1
-		+ ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
-		return 0;
-
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t const lastRun = (size_t)(iend - anchor);
+		if ((outputLimited) &&
+			/* Check output buffer overflow */
+			((op - (BYTE *)dest) + lastRun + 1 +
+			((lastRun + 255 - RUN_MASK)/255) > (U32)maxOutputSize))
+			return 0;
+		if (lastRun >= RUN_MASK) {
+			size_t accumulator = lastRun - RUN_MASK;
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRun);
+		op += lastRun;
+	}
 
 	/* End */
-	return (int)(((char *)op) - dest);
+	return (int) (((char *)op) - dest);
 }
 
-static inline int lz4_compress64kctx(void *ctx,
-		const char *source,
-		char *dest,
-		int isize,
-		int maxoutputsize)
+int LZ4_compress_fast_extState(void *state, const char *source, char *dest,
+	int inputSize, int maxOutputSize, int acceleration)
 {
-	u16 *hashtable = (u16 *)ctx;
-	const u8 *ip = (u8 *) source;
-	const u8 *anchor = ip;
-	const u8 *const base = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	#define MATCHLIMIT (iend - LASTLITERALS)
-
-	u8 *op = (u8 *) dest;
-	u8 *const oend = op + maxoutputsize;
-	int len, length;
-	const int skipstrength = SKIPSTRENGTH;
-	u32 forwardh;
-	int lastrun;
-
-	/* Init */
-	if (isize < MINLENGTH)
-		goto _last_literals;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
+
+	LZ4_resetStream((LZ4_stream_t *)state);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize, 0,
+				noLimit, tableType, noDict,
+				noDictIssue, acceleration);
+	} else {
+		if (inputSize < LZ4_64Klimit)
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, byU16, noDict,
+				noDictIssue, acceleration);
+		else
+			return LZ4_compress_generic(ctx, source,
+				dest, inputSize,
+				maxOutputSize, limitedOutput, tableType, noDict,
+				noDictIssue, acceleration);
+	}
+}
 
-	memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+int LZ4_compress_fast(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem, int acceleration)
+{
+	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
+		maxOutputSize, acceleration);
+}
+EXPORT_SYMBOL(LZ4_compress_fast);
+
+int LZ4_compress_default(const char *source, char *dest, int inputSize,
+	int maxOutputSize, void *wrkmem)
+{
+	return LZ4_compress_fast(source, dest, inputSize,
+		maxOutputSize, wrkmem, 1);
+}
+EXPORT_SYMBOL(LZ4_compress_default);
+
+/*-******************************
+ *	*_destSize() variant
+ ********************************/
+
+static int LZ4_compress_destSize_generic(
+	LZ4_stream_t_internal * const ctx,
+	const char * const src,
+	char * const dst,
+	int * const srcSizePtr,
+	const int targetDstSize,
+	const tableType_t tableType)
+{
+	const BYTE *ip = (const BYTE *) src;
+	const BYTE *base = (const BYTE *) src;
+	const BYTE *lowLimit = (const BYTE *) src;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + *srcSizePtr;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = iend - LASTLITERALS;
+
+	BYTE *op = (BYTE *) dst;
+	BYTE * const oend = op + targetDstSize;
+	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
+		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
+	BYTE * const oMaxMatch = op + targetDstSize
+		- (LASTLITERALS + 1 /* token */);
+	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
+
+	U32 forwardH;
+
+	/* Init conditions */
+	/* Impossible to store anything */
+	if (targetDstSize < 1)
+		return 0;
+	/* Unsupported input size, too large (or negative) */
+	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
+		return 0;
+	/* Size too large (not within 64K limit) */
+	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
+		return 0;
+	/* Input too small, no compression (all literals) */
+	if (*srcSizePtr < LZ4_minLength)
+		goto _last_literals;
 
 	/* First Byte */
-	ip++;
-	forwardh = LZ4_HASH64K_VALUE(ip);
+	*srcSizePtr = 0;
+	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+	ip++; forwardH = LZ4_hashPosition(ip, tableType);
 
 	/* Main Loop */
-	for (;;) {
-		int findmatchattempts = (1U << skipstrength) + 3;
-		const u8 *forwardip = ip;
-		const u8 *ref;
-		u8 *token;
+	for ( ; ; ) {
+		const BYTE *match;
+		BYTE *token;
 
 		/* Find a match */
-		do {
-			u32 h = forwardh;
-			int step = findmatchattempts++ >> skipstrength;
-			ip = forwardip;
-			forwardip = ip + step;
-
-			if (forwardip > mflimit)
-				goto _last_literals;
-
-			forwardh = LZ4_HASH64K_VALUE(forwardip);
-			ref = base + hashtable[h];
-			hashtable[h] = (u16)(ip - base);
-		} while (A32(ref) != A32(ip));
+		{	 const BYTE *forwardIp = ip;
+			unsigned int step = 1;
+			unsigned int searchMatchNb = 1 << LZ4_skipTrigger;
+
+			do {
+				U32 h = forwardH;
+
+				ip = forwardIp;
+				forwardIp += step;
+				step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+				if (unlikely(forwardIp > mflimit))
+					goto _last_literals;
+
+				match = LZ4_getPositionOnHash(h, ctx->hashTable,
+					tableType, base);
+				forwardH = LZ4_hashPosition(forwardIp,
+					tableType);
+				LZ4_putPositionOnHash(ip, h,
+					ctx->hashTable, tableType,
+					base);
+
+			} while (((tableType == byU16)
+				? 0
+				: (match + MAX_DISTANCE < ip))
+				|| (LZ4_read32(match) != LZ4_read32(ip)));
+		}
 
 		/* Catch up */
-		while ((ip > anchor) && (ref > (u8 *)source)
-			&& (ip[-1] == ref[-1])) {
-			ip--;
-			ref--;
-		}
+		while ((ip > anchor)
+			&& (match > lowLimit)
+			&& (unlikely(ip[-1] == match[-1]))) {
+			ip--; match--;
+			}
 
 		/* Encode Literal length */
-		length = (int)(ip - anchor);
-		token = op++;
-		/* Check output limit */
-		if (unlikely(op + length + (2 + 1 + LASTLITERALS)
-			+ (length >> 8) > oend))
-			return 0;
-		if (length >= (int)RUN_MASK) {
-			*token = (RUN_MASK << ML_BITS);
-			len = length - RUN_MASK;
-			for (; len > 254 ; len -= 255)
-				*op++ = 255;
-			*op++ = (u8)len;
-		} else
-			*token = (length << ML_BITS);
+		{	 unsigned int litLength = (unsigned int)(ip - anchor);
 
-		/* Copy Literals */
-		LZ4_BLINDCOPY(anchor, op, length);
+			token = op++;
+			if (op + ((litLength + 240)/255)
+				+ litLength > oMaxLit) {
+				/* Not enough space for a last match */
+				op--;
+				goto _last_literals;
+			}
+			if (litLength >= RUN_MASK) {
+				unsigned int len = litLength - RUN_MASK;
+				*token = (RUN_MASK<<ML_BITS);
+				for (; len >= 255 ; len -= 255)
+					*op++ = 255;
+				*op++ = (BYTE)len;
+			} else
+				*token = (BYTE)(litLength<<ML_BITS);
+
+			/* Copy Literals */
+			LZ4_wildCopy(op, anchor, op + litLength);
+			op += litLength;
+		}
 
 _next_match:
 		/* Encode Offset */
-		LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
 
-		/* Start Counting */
-		ip += MINMATCH;
-		/* MinMatch verified */
-		ref += MINMATCH;
-		anchor = ip;
+		/* Encode MatchLength */
+		{	 size_t matchLength = LZ4_count(ip + MINMATCH,
+			match + MINMATCH, matchlimit);
 
-		while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
-			#if LZ4_ARCH64
-			u64 diff = A64(ref) ^ A64(ip);
-			#else
-			u32 diff = A32(ref) ^ A32(ip);
-			#endif
-
-			if (!diff) {
-				ip += STEPSIZE;
-				ref += STEPSIZE;
-				continue;
+			if (op + ((matchLength + 240)/255) > oMaxMatch) {
+				/* Match description too long : reduce it */
+				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
 			}
-			ip += LZ4_NBCOMMONBYTES(diff);
-			goto _endcount;
-		}
-		#if LZ4_ARCH64
-		if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
-			ip += 4;
-			ref += 4;
+			ip += MINMATCH + matchLength;
+
+			if (matchLength >= ML_MASK) {
+				*token += ML_MASK;
+				matchLength -= ML_MASK;
+				while (matchLength >= 255) {
+					matchLength -= 255; *op++ = 255;
+				}
+				*op++ = (BYTE)matchLength;
+			} else
+				*token += (BYTE)(matchLength);
 		}
-		#endif
-		if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
-			ip += 2;
-			ref += 2;
-		}
-		if ((ip < MATCHLIMIT) && (*ref == *ip))
-			ip++;
-_endcount:
 
-		/* Encode MatchLength */
-		len = (int)(ip - anchor);
-		/* Check output limit */
-		if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
-			return 0;
-		if (len >= (int)ML_MASK) {
-			*token += ML_MASK;
-			len -= ML_MASK;
-			for (; len > 509 ; len -= 510) {
-				*op++ = 255;
-				*op++ = 255;
-			}
-			if (len > 254) {
-				len -= 255;
-				*op++ = 255;
-			}
-			*op++ = (u8)len;
-		} else
-			*token += len;
+		anchor = ip;
 
-		/* Test end of chunk */
-		if (ip > mflimit) {
-			anchor = ip;
+		/* Test end of block */
+		if (ip > mflimit)
+			break;
+		if (op > oMaxSeq)
 			break;
-		}
 
 		/* Fill table */
-		hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
 
 		/* Test next position */
-		ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
-		hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
-		if (A32(ref) == A32(ip)) {
-			token = op++;
-			*token = 0;
+		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
+		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
+
+		if ((match + MAX_DISTANCE >= ip)
+			&& (LZ4_read32(match) == LZ4_read32(ip))) {
+			token = op++; *token = 0;
 			goto _next_match;
 		}
 
 		/* Prepare next loop */
-		anchor = ip++;
-		forwardh = LZ4_HASH64K_VALUE(ip);
+		forwardH = LZ4_hashPosition(++ip, tableType);
 	}
 
 _last_literals:
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
-		return 0;
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8)lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{	 size_t lastRunSize = (size_t)(iend - anchor);
+
+		if (op + 1 /* token */
+			+ ((lastRunSize + 240)/255) /* litLength */
+			+ lastRunSize /* literals */ > oend) {
+			/* adapt lastRunSize to fill 'dst' */
+			lastRunSize	= (oend - op) - 1;
+			lastRunSize -= (lastRunSize + 240)/255;
+		}
+		ip = anchor + lastRunSize;
+
+		if (lastRunSize >= RUN_MASK) {
+			size_t accumulator = lastRunSize - RUN_MASK;
+
+			*op++ = RUN_MASK << ML_BITS;
+			for (; accumulator >= 255 ; accumulator -= 255)
+				*op++ = 255;
+			*op++ = (BYTE) accumulator;
+		} else {
+			*op++ = (BYTE)(lastRunSize<<ML_BITS);
+		}
+		memcpy(op, anchor, lastRunSize);
+		op += lastRunSize;
+	}
+
 	/* End */
-	return (int)(((char *)op) - dest);
+	*srcSizePtr = (int) (((const char *)ip) - src);
+	return (int) (((char *)op) - dst);
 }
 
-int lz4_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+static int LZ4_compress_destSize_extState(LZ4_stream_t *state, const char *src,
+	char *dst, int *srcSizePtr, int targetDstSize)
 {
-	int ret = -1;
-	int out_len = 0;
+	#if LZ4_ARCH64
+	tableType_t tableType = byU32;
+	#else
+	tableType_t tableType = byPtr;
+	#endif
+
+	LZ4_resetStream(state);
+
+	if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {
+		/* compression success is guaranteed */
+		return LZ4_compress_fast_extState(
+			state, src, dst, *srcSizePtr,
+			targetDstSize, 1);
+	} else {
+		if (*srcSizePtr < LZ4_64Klimit)
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, byU16);
+		else
+			return LZ4_compress_destSize_generic(
+				&state->internal_donotuse,
+				src, dst, srcSizePtr,
+				targetDstSize, tableType);
+	}
+}
 
-	if (src_len < LZ4_64KLIMIT)
-		out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
-	else
-		out_len = lz4_compressctx(wrkmem, src, dst, src_len,
-				lz4_compressbound(src_len));
 
-	if (out_len < 0)
-		goto exit;
+int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr,
+	int targetDstSize, void *wrkmem)
+{
+	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
+		targetDstSize);
+}
+EXPORT_SYMBOL(LZ4_compress_destSize);
+
+/*-******************************
+ *	Streaming functions
+ ********************************/
+void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
+{
+	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
+}
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
+	const char *dictionary, int dictSize)
+{
+	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
+	const BYTE *p = (const BYTE *)dictionary;
+	const BYTE * const dictEnd = p + dictSize;
+	const BYTE *base;
+
+	if ((dict->initCheck)
+		|| (dict->currentOffset > 1 * GB)) {
+		/* Uninitialized structure, or reuse overflow */
+		LZ4_resetStream(LZ4_dict);
+	}
+
+	if (dictSize < (int)HASH_UNIT) {
+		dict->dictionary = NULL;
+		dict->dictSize = 0;
+		return 0;
+	}
+
+	if ((dictEnd - p) > 64 * KB)
+		p = dictEnd - 64 * KB;
+	dict->currentOffset += 64 * KB;
+	base = p - dict->currentOffset;
+	dict->dictionary = p;
+	dict->dictSize = (U32)(dictEnd - p);
+	dict->currentOffset += dict->dictSize;
+
+	while (p <= dictEnd - HASH_UNIT) {
+		LZ4_putPosition(p, dict->hashTable, byU32, base);
+		p += 3;
+	}
 
-	*dst_len = out_len;
+	return dict->dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDict);
+
+static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
+	const BYTE *src)
+{
+	if ((LZ4_dict->currentOffset > 0x80000000) ||
+		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
+		/* address space overflow */
+		/* rescale hash table */
+		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
+		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+		int i;
+
+		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
+			if (LZ4_dict->hashTable[i] < delta)
+				LZ4_dict->hashTable[i] = 0;
+			else
+				LZ4_dict->hashTable[i] -= delta;
+		}
+		LZ4_dict->currentOffset = 64 * KB;
+		if (LZ4_dict->dictSize > 64 * KB)
+			LZ4_dict->dictSize = 64 * KB;
+		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+	}
+}
+
+int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
+{
+	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
+	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
+
+	if ((U32)dictSize > 64 * KB) {
+		/* useless to define a dictionary > 64 * KB */
+		dictSize = 64 * KB;
+	}
+	if ((U32)dictSize > dict->dictSize)
+		dictSize = dict->dictSize;
 
-	return 0;
-exit:
-	return ret;
+	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+	dict->dictionary = (const BYTE *)safeBuffer;
+	dict->dictSize = (U32)dictSize;
+
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDict);
+
+int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
+	char *dest, int inputSize, int maxOutputSize, int acceleration)
+{
+	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
+	const BYTE * const dictEnd = streamPtr->dictionary
+		+ streamPtr->dictSize;
+
+	const BYTE *smallest = (const BYTE *) source;
+
+	if (streamPtr->initCheck) {
+		/* Uninitialized structure detected */
+		return 0;
+	}
+
+	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
+		smallest = dictEnd;
+
+	LZ4_renormDictT(streamPtr, smallest);
+
+	if (acceleration < 1)
+		acceleration = LZ4_ACCELERATION_DEFAULT;
+
+	/* Check overlapping input/dictionary space */
+	{	 const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+
+		if ((sourceEnd > streamPtr->dictionary)
+			&& (sourceEnd < dictEnd)) {
+			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+			if (streamPtr->dictSize > 64 * KB)
+				streamPtr->dictSize = 64 * KB;
+			if (streamPtr->dictSize < 4)
+				streamPtr->dictSize = 0;
+			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+		}
+	}
+
+	/* prefix mode : source data follows dictionary */
+	if (dictEnd == (const BYTE *)source) {
+		int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, dictSmall,	acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				withPrefix64k, noDictIssue, acceleration);
+		}
+		streamPtr->dictSize += (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+
+	/* external dictionary mode */
+	{ int result;
+
+		if ((streamPtr->dictSize < 64 * KB) &&
+			(streamPtr->dictSize < streamPtr->currentOffset)) {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, dictSmall, acceleration);
+		} else {
+			result = LZ4_compress_generic(
+				streamPtr, source, dest, inputSize,
+				maxOutputSize, limitedOutput, byU32,
+				usingExtDict, noDictIssue, acceleration);
+		}
+		streamPtr->dictionary = (const BYTE *)source;
+		streamPtr->dictSize = (U32)inputSize;
+		streamPtr->currentOffset += (U32)inputSize;
+		return result;
+	}
+}
+EXPORT_SYMBOL(LZ4_compress_fast_continue);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_compress(const unsigned char *src, size_t src_len, unsigned char *dst,
+	size_t *dst_len, void *wrkmem) {
+	*dst_len = LZ4_compress_default(src, dst, (int)src_len,
+		(int)((size_t)dst_len), wrkmem);
+
+	/*
+	 * Prior lz4_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_fast/default
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4_compress);
 
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index 6d940c7..590057b 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,25 +1,16 @@
 /*
- * LZ4 Decompressor for Linux kernel
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- *
- * Based on LZ4 implementation by Yann Collet.
- *
  * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
+ * Copyright (C) 2011 - 2016, Yann Collet.
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *    * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *    * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,313 +22,508 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  You can contact the author at :
- *  - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- *  - LZ4 source repository : http://code.google.com/p/lz4/
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#ifndef STATIC
+/*-************************************
+ *	Dependencies
+ **************************************/
+#include <linux/lz4.h>
+#include "lz4defs.h"
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
-#endif
-#include <linux/lz4.h>
-
 #include <asm/unaligned.h>
 
-#include "lz4defs.h"
-
-static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
-static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
-#endif
-
-static int lz4_uncompress(const char *source, char *dest, int osize)
+/*-*****************************
+ *	Decompression functions
+ *******************************/
+/* LZ4_decompress_generic() :
+ * This generic decompression function cover all use cases.
+ * It shall be instantiated several times, using different sets of directives
+ * Note that it is important this generic function is really inlined,
+ * in order to remove useless branches during compilation optimization.
+ */
+static inline int LZ4_decompress_generic(
+	 const char *const source,
+	 char * const dest,
+	 int inputSize,
+		/*
+		 * If endOnInput == endOnInputSize,
+		 * this value is the max size of Output Buffer.
+		 */
+	 int outputSize,
+	 /* endOnOutputSize, endOnInputSize */
+	 int endOnInput,
+	 /* full, partial */
+	 int partialDecoding,
+	 /* only used if partialDecoding == partial */
+	 int targetOutputSize,
+	 /* noDict, withPrefix64k, usingExtDict */
+	 int dict,
+	 /* == dest when no prefix */
+	 const BYTE * const lowPrefix,
+	 /* only if dict == usingExtDict */
+	 const BYTE * const dictStart,
+	 /* note : = 0 if noDict */
+	 const size_t dictSize
+	 )
 {
+	/* Local Variables */
 	const BYTE *ip = (const BYTE *) source;
-	const BYTE *ref;
+	const BYTE * const iend = ip + inputSize;
+
 	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + osize;
+	BYTE * const oend = op + outputSize;
 	BYTE *cpy;
-	unsigned token;
-	size_t length;
+	BYTE *oexit = op + targetOutputSize;
+	const BYTE * const lowLimit = lowPrefix - dictSize;
+
+	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
+	const unsigned int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};
+	const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+
+	const int safeDecode = (endOnInput == endOnInputSize);
+	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
 
+	/* Special cases */
+	/* targetOutputSize too high => decode everything */
+	if ((partialDecoding) && (oexit > oend - MFLIMIT))
+		oexit = oend - MFLIMIT;
+
+	/* Empty output buffer */
+	if ((endOnInput) && (unlikely(outputSize == 0)))
+		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
+
+	if ((!endOnInput) && (unlikely(outputSize == 0)))
+		return (*ip == 0 ? 1 : -1);
+
+	/* Main Loop : decode sequences */
 	while (1) {
+		size_t length;
+		const BYTE *match;
+		size_t offset;
+
+		/* get literal length */
+		unsigned int const token = *ip++;
+
+		length = token>>ML_BITS;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
 		if (length == RUN_MASK) {
-			size_t len;
+			unsigned int s;
 
-			len = *ip++;
-			for (; len == 255; length += 255)
-				len = *ip++;
-			if (unlikely(length > (size_t)(length + len)))
+			do {
+				s = *ip++;
+				length += s;
+			} while (likely(endOnInput
+				? ip < iend - RUN_MASK
+				: 1) & (s == 255));
+
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)(op))) {
+				/* overflow detection */
 				goto _output_error;
-			length += len;
+			}
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(ip + length) < (size_t)(ip))) {
+				/* overflow detection */
+				goto _output_error;
+			}
 		}
 
 		/* copy literals */
 		cpy = op + length;
-		if (unlikely(cpy > oend - COPYLENGTH)) {
-			/*
-			 * Error: not enough place for another match
-			 * (min 4) + 5 literals
-			 */
-			if (cpy != oend)
-				goto _output_error;
-
+		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
+			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
+			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
+			if (partialDecoding) {
+				if (cpy > oend) {
+					/*
+					 * Error :
+					 * write attempt beyond end of output buffer
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& (ip + length > iend)) {
+					/*
+					 * Error :
+					 * read attempt beyond
+					 * end of input buffer
+					 */
+					goto _output_error;
+				}
+			} else {
+				if ((!endOnInput)
+					&& (cpy != oend)) {
+					/*
+					 * Error :
+					 * block decoding must
+					 * stop exactly there
+					 */
+					goto _output_error;
+				}
+				if ((endOnInput)
+					&& ((ip + length != iend)
+					|| (cpy > oend))) {
+					/*
+					 * Error :
+					 * input must be consumed
+					 */
+					goto _output_error;
+				}
+			}
 			memcpy(op, ip, length);
 			ip += length;
-			break; /* EOF */
+			op += length;
+			/* Necessarily EOF, due to parsing restrictions */
+			break;
 		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+		LZ4_wildCopy(op, ip, cpy);
+		ip += length; op = cpy;
 
 		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-
-		/* Error: offset create reference outside destination buffer */
-		if (unlikely(ref < (BYTE *const) dest))
+		offset = LZ4_readLE16(ip); ip += 2;
+		match = op - offset;
+		if ((checkOffset) && (unlikely(match < lowLimit))) {
+			/* Error : offset outside buffers */
 			goto _output_error;
+		}
+		/* costs ~1%; silence an msan warning when offset == 0 */
+		LZ4_write32(op, (U32)offset);
 
 		/* get matchlength */
 		length = token & ML_MASK;
 		if (length == ML_MASK) {
-			for (; *ip == 255; length += 255)
-				ip++;
-			if (unlikely(length > (size_t)(length + *ip)))
+			unsigned int s;
+
+			do {
+			s = *ip++;
+			if ((endOnInput) && (ip > iend - LASTLITERALS))
+				goto _output_error;
+			length += s;
+			} while (s == 255);
+			if ((safeDecode)
+				&& unlikely(
+					(size_t)(op + length) < (size_t)op)) {
+				/* overflow detection */
 				goto _output_error;
-			length += *ip++;
+			}
 		}
+		length += MINMATCH;
+
+		/* check external dictionary */
+		if ((dict == usingExtDict) && (match < lowPrefix)) {
+			if (unlikely(op + length > oend - LASTLITERALS)) {
+				/* doesn't respect parsing restriction */
+				goto _output_error;
+			}
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-			op[0] = ref[0];
-			op[1] = ref[1];
-			op[2] = ref[2];
-			op[3] = ref[3];
-			op += 4;
-			ref += 4;
-			ref -= dec32table[op-ref];
-			PUT4(ref, op);
-			op += STEPSIZE - 4;
-			ref -= dec64;
+			if (length <= (size_t)(lowPrefix - match)) {
+			/*
+			 * match can be copied as a single segment
+			 * from external dictionary
+			 */
+			memmove(op, dictEnd - (lowPrefix - match), length);
+			op += length;
+			} else {
+				/*
+				 * match encompass external
+				 * dictionary and current block
+				 */
+				size_t const copySize = (size_t)(lowPrefix - match);
+				size_t const restSize = length - copySize;
+
+				memcpy(op, dictEnd - copySize, copySize);
+				op += copySize;
+
+				if (restSize > (size_t)(op - lowPrefix)) {
+					/* overlap copy */
+					BYTE * const endOfMatch = op + restSize;
+					const BYTE *copyFrom = lowPrefix;
+
+					while (op < endOfMatch)
+						*op++ = *copyFrom++;
+				} else {
+					memcpy(op, lowPrefix, restSize);
+					op += restSize;
+				}
+			}
+			continue;
+		}
+
+		/* copy match within block */
+		cpy = op + length;
+		if (unlikely(offset < 8)) {
+			const int dec64 = dec64table[offset];
+
+			op[0] = match[0];
+			op[1] = match[1];
+			op[2] = match[2];
+			op[3] = match[3];
+			match += dec32table[offset];
+			memcpy(op + 4, match, 4);
+			match -= dec64;
 		} else {
-			LZ4_COPYSTEP(ref, op);
+			LZ4_copy8(op, match); match += 8;
 		}
-		cpy = op + length - (STEPSIZE - 4);
-		if (cpy > (oend - COPYLENGTH)) {
 
-			/* Error: request to write beyond destination buffer */
-			if (cpy > oend)
-				goto _output_error;
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
+		op += 8;
+
+		if (unlikely(cpy > oend - 12)) {
+			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
+
+			if (cpy > oend - LASTLITERALS) {
+				/*
+				 * Error : last LASTLITERALS bytes
+				 * must be literals (uncompressed)
+				 */
 				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+			}
+			if (op < oCopyLimit) {
+				LZ4_wildCopy(op, match, oCopyLimit);
+				match += oCopyLimit - op;
+				op = oCopyLimit;
+			}
 			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
+				*op++ = *match++;
+		} else {
+			LZ4_copy8(op, match);
+			if (length > 16)
+				LZ4_wildCopy(op + 8, match + 8, cpy);
 		}
-		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy; /* correction */
 	}
+
 	/* end of decoding */
-	return (int) (((char *)ip) - source);
+	if (endOnInput) {
+		/* Nb of output bytes decoded */
+		return (int) (((char *)op) - dest);
+	} else {
+		/* Nb of input bytes read */
+		return (int) (((const char *)ip) - source);
+	}
 
-	/* write overflow error detected */
+	/* Overflow error detected */
 _output_error:
 	return -1;
 }
 
-static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
-				int isize, size_t maxoutputsize)
+int LZ4_decompress_safe(const char *source, char *dest,
+	int compressedSize, int maxDecompressedSize)
 {
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *const iend = ip + isize;
-	const BYTE *ref;
-
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, full, 0,
+		noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe);
 
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxoutputsize;
-	BYTE *cpy;
+int LZ4_decompress_safe_partial(const char *source, char *dest,
+	int compressedSize, int targetOutputSize, int maxDecompressedSize)
+{
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxDecompressedSize, endOnInputSize, partial,
+		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_partial);
 
-	/* Main Loop */
-	while (ip < iend) {
+int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
+{
+	return LZ4_decompress_generic(source, dest, 0, originalSize,
+		endOnOutputSize, full, 0, withPrefix64k,
+		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast);
 
-		unsigned token;
-		size_t length;
+int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *dictionary, int dictSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
 
-		/* get runlength */
-		token = *ip++;
-		length = (token >> ML_BITS);
-		if (length == RUN_MASK) {
-			int s = 255;
-			while ((ip < iend) && (s == 255)) {
-				s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-			}
-		}
-		/* copy literals */
-		cpy = op + length;
-		if ((cpy > oend - COPYLENGTH) ||
-			(ip + length > iend - COPYLENGTH)) {
-
-			if (cpy > oend)
-				goto _output_error;/* writes beyond buffer */
-
-			if (ip + length != iend)
-				goto _output_error;/*
-						    * Error: LZ4 format requires
-						    * to consume all input
-						    * at this stage
-						    */
-			memcpy(op, ip, length);
-			op += length;
-			break;/* Necessarily EOF, due to parsing restrictions */
-		}
-		LZ4_WILDCOPY(ip, op, cpy);
-		ip -= (op - cpy);
-		op = cpy;
+	lz4sd->prefixSize = (size_t) dictSize;
+	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
+	lz4sd->externalDict = NULL;
+	lz4sd->extDictSize	= 0;
+	return 1;
+}
+EXPORT_SYMBOL(LZ4_setStreamDecode);
 
-		/* get offset */
-		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
-		ip += 2;
-		if (ref < (BYTE * const) dest)
-			goto _output_error;
-			/*
-			 * Error : offset creates reference
-			 * outside of destination buffer
-			 */
+/*
+ * *_continue() :
+ * These decoding functions allow decompression of multiple blocks
+ * in "streaming" mode.
+ * Previously decoded blocks must still be available at the memory
+ * position where they were decoded.
+ * If it's not possible, save the relevant part of
+ * decoded data into a safe buffer,
+ * and indicate where it stands using LZ4_setStreamDecode()
+ */
+int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int compressedSize, int maxOutputSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize,
+			maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict,
+			lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += result;
+		lz4sd->prefixEnd	+= result;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize,
+			endOnInputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = result;
+		lz4sd->prefixEnd	= (BYTE *)dest + result;
+	}
 
-		/* get matchlength */
-		length = (token & ML_MASK);
-		if (length == ML_MASK) {
-			while (ip < iend) {
-				int s = *ip++;
-				if (unlikely(length > (size_t)(length + s)))
-					goto _output_error;
-				length += s;
-				if (s == 255)
-					continue;
-				break;
-			}
-		}
+	return result;
+}
+EXPORT_SYMBOL(LZ4_decompress_safe_continue);
 
-		/* copy repeated sequence */
-		if (unlikely((op - ref) < STEPSIZE)) {
-#if LZ4_ARCH64
-			int dec64 = dec64table[op - ref];
-#else
-			const int dec64 = 0;
-#endif
-				op[0] = ref[0];
-				op[1] = ref[1];
-				op[2] = ref[2];
-				op[3] = ref[3];
-				op += 4;
-				ref += 4;
-				ref -= dec32table[op - ref];
-				PUT4(ref, op);
-				op += STEPSIZE - 4;
-				ref -= dec64;
-		} else {
-			LZ4_COPYSTEP(ref, op);
-		}
-		cpy = op + length - (STEPSIZE-4);
-		if (cpy > oend - COPYLENGTH) {
-			if (cpy > oend)
-				goto _output_error; /* write outside of buf */
-#if LZ4_ARCH64
-			if ((ref + COPYLENGTH) > oend)
-#else
-			if ((ref + COPYLENGTH) > oend ||
-					(op + COPYLENGTH) > oend)
-#endif
-				goto _output_error;
-			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
-			while (op < cpy)
-				*op++ = *ref++;
-			op = cpy;
-			/*
-			 * Check EOF (should never happen, since last 5 bytes
-			 * are supposed to be literals)
-			 */
-			if (op == oend)
-				goto _output_error;
-			continue;
-		}
-		LZ4_SECURECOPY(ref, op, cpy);
-		op = cpy; /* correction */
+int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+	const char *source, char *dest, int originalSize)
+{
+	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
+	int result;
+
+	if (lz4sd->prefixEnd == (BYTE *)dest) {
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict,
+			lz4sd->prefixEnd - lz4sd->prefixSize,
+			lz4sd->externalDict, lz4sd->extDictSize);
+
+		if (result <= 0)
+			return result;
+
+		lz4sd->prefixSize += originalSize;
+		lz4sd->prefixEnd	+= originalSize;
+	} else {
+		lz4sd->extDictSize = lz4sd->prefixSize;
+		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+		result = LZ4_decompress_generic(source, dest, 0, originalSize,
+			endOnOutputSize, full, 0,
+			usingExtDict, (BYTE *)dest,
+			lz4sd->externalDict, lz4sd->extDictSize);
+		if (result <= 0)
+			return result;
+		lz4sd->prefixSize = originalSize;
+		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
 	}
-	/* end of decoding */
-	return (int) (((char *) op) - dest);
 
-	/* write overflow error detected */
-_output_error:
-	return -1;
+	return result;
 }
+EXPORT_SYMBOL(LZ4_decompress_fast_continue);
 
-int lz4_decompress(const unsigned char *src, size_t *src_len,
-		unsigned char *dest, size_t actual_dest_len)
+/*
+ * Advanced decoding functions :
+ * *_usingDict() :
+ * These decoding functions work the same as "_continue" ones,
+ * the dictionary must be explicitly provided within parameters
+ */
+static inline int LZ4_decompress_usingDict_generic(const char *source,
+	char *dest, int compressedSize, int maxOutputSize, int safe,
+	const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int input_len = 0;
-
-	input_len = lz4_uncompress(src, dest, actual_dest_len);
-	if (input_len < 0)
-		goto exit_0;
-	*src_len = input_len;
+	if (dictSize == 0)
+		return LZ4_decompress_generic(source, dest,
+			compressedSize, maxOutputSize, safe, full, 0,
+			noDict, (BYTE *)dest, NULL, 0);
+	if (dictStart + dictSize == dest) {
+		if (dictSize >= (int)(64 * KB - 1))
+			return LZ4_decompress_generic(source, dest,
+				compressedSize, maxOutputSize, safe, full, 0,
+				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
+		return LZ4_decompress_generic(source, dest, compressedSize,
+			maxOutputSize, safe, full, 0, noDict,
+			(BYTE *)dest - dictSize, NULL, 0);
+	}
+	return LZ4_decompress_generic(source, dest, compressedSize,
+		maxOutputSize, safe, full, 0, usingExtDict,
+		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
+}
 
-	return 0;
-exit_0:
-	return ret;
+int LZ4_decompress_safe_usingDict(const char *source, char *dest,
+	int compressedSize, int maxOutputSize,
+	const char *dictStart, int dictSize)
+{
+	return LZ4_decompress_usingDict_generic(source, dest,
+		compressedSize, maxOutputSize, 1, dictStart, dictSize);
 }
-#ifndef STATIC
-EXPORT_SYMBOL(lz4_decompress);
-#endif
+EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
 
-int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
-		unsigned char *dest, size_t *dest_len)
+int LZ4_decompress_fast_usingDict(const char *source, char *dest,
+	int originalSize, const char *dictStart, int dictSize)
 {
-	int ret = -1;
-	int out_len = 0;
-
-	out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
-					*dest_len);
-	if (out_len < 0)
-		goto exit_0;
-	*dest_len = out_len;
-
-	return 0;
-exit_0:
-	return ret;
+	return LZ4_decompress_usingDict_generic(source, dest, 0,
+		originalSize, 0, dictStart, dictSize);
+}
+EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4_decompress_unknownoutputsize(const unsigned char *src,
+	size_t src_len, unsigned char *dest, size_t *dest_len) {
+	 *dest_len = LZ4_decompress_safe(src, dest,
+		 (int)src_len, (int)((size_t)dest_len));
+
+		/*
+		 * Prior lz4_decompress_unknownoutputsize will return
+		 * 0 for success and a negative result for error
+		 * new LZ4_decompress_safe returns
+		 * - the length of data read on success
+		 * - and also a negative result on error
+		 * meaning when result > 0, we just return 0 here
+		 */
+		if (src_len > 0) {
+			return 0;
+		} else
+			return -1;
 }
-#ifndef STATIC
 EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+	unsigned char *dest, size_t actual_dest_len) {
+	*src_len = LZ4_decompress_fast(src, dest, (int)actual_dest_len);
+
+	/*
+	 * Prior lz4_decompress will return
+	 * 0 for success and a negative result for error
+	 * new LZ4_decompress_fast returns
+	 * - the length of data read on success
+	 * - and also a negative result on error
+	 * meaning when result > 0, we just return 0 here
+	 */
+	if (src_len > 0)
+		return 0;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(lz4_decompress);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 Decompressor");
-#endif
+MODULE_DESCRIPTION("LZ4 decompressor");
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index c79d7ea..b0d21b5 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,157 +1,219 @@
+#ifndef __LZ4DEFS_H__
+#define __LZ4DEFS_H__
+
 /*
- * lz4defs.h -- architecture specific defines
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ * lz4defs.h -- common and architecture specific defines for the kernel usage
+
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2016, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *    * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * You can contact the author at :
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
+#include <asm/unaligned.h>
+#include <linux/string.h>	 /* memset, memcpy */
+
 /*
  * Detects 64 bits mode
- */
+*/
 #if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
 #endif
 
-/*
- * Architecture-specific macros
- */
-#define BYTE	u8
-typedef struct _U16_S { u16 v; } U16_S;
-typedef struct _U32_S { u32 v; } U32_S;
-typedef struct _U64_S { u64 v; } U64_S;
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-
-#define A16(x) (((U16_S *)(x))->v)
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-#define PUT4(s, d) (A32(d) = A32(s))
-#define PUT8(s, d) (A64(d) = A64(s))
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - A16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		A16(p) = v; \
-		p += 2; \
-	} while (0)
-#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
-
-#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
-#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
-#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
-
-#define PUT4(s, d) \
-	put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
-#define PUT8(s, d) \
-	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
-
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
-	(d = s - get_unaligned_le16(p))
-
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
-	do {						\
-		put_unaligned_le16(v, (u16 *)(p));	\
-		p += 2;					\
-	} while (0)
+/*-************************************
+ *	Basic Types
+ **************************************/
+#include <linux/types.h>
+
+typedef	uint8_t BYTE;
+typedef uint16_t U16;
+typedef uint32_t U32;
+typedef	int32_t S32;
+typedef uint64_t U64;
+typedef uintptr_t uptrval;
+
+#if defined(__x86_64__)
+	typedef U64		reg_t;	 /* 64-bits in x32 mode */
+#else
+	typedef size_t reg_t;	 /* 32-bits in x32 mode */
 #endif
 
-#define COPYLENGTH 8
-#define ML_BITS  4
-#define ML_MASK  ((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-#define MEMORY_USAGE	14
-#define MINMATCH	4
-#define SKIPSTRENGTH	6
-#define LASTLITERALS	5
-#define MFLIMIT		(COPYLENGTH + MINMATCH)
-#define MINLENGTH	(MFLIMIT + 1)
-#define MAXD_LOG	16
-#define MAXD		(1 << MAXD_LOG)
-#define MAXD_MASK	(u32)(MAXD - 1)
-#define MAX_DISTANCE	(MAXD - 1)
-#define HASH_LOG	(MAXD_LOG - 1)
-#define HASHTABLESIZE	(1 << HASH_LOG)
-#define MAX_NB_ATTEMPTS	256
-#define OPTIMAL_ML	(int)((ML_MASK-1)+MINMATCH)
-#define LZ4_64KLIMIT	((1<<16) + (MFLIMIT - 1))
-#define HASHLOG64K	((MEMORY_USAGE - 2) + 1)
-#define HASH64KTABLESIZE	(1U << HASHLOG64K)
-#define LZ4_HASH_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - (MEMORY_USAGE-2)))
-#define LZ4_HASH64K_VALUE(p)	(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASHLOG64K))
-#define HASH_VALUE(p)		(((A32(p)) * 2654435761U) >> \
-				((MINMATCH * 8) - HASH_LOG))
-
-#if LZ4_ARCH64/* 64-bit */
-#define STEPSIZE 8
-
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT8(s, d);	\
-		d += 8;		\
-		s += 8;		\
-	} while (0)
-
-#define LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
-
-#define LZ4_SECURECOPY(s, d, e)			\
-	do {					\
-		if (d < e) {			\
-			LZ4_WILDCOPY(s, d, e);	\
-		}				\
-	} while (0)
-#define HTYPE u32
-
-#ifdef __BIG_ENDIAN
-#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+/*-************************************
+ *	Constants
+ **************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (WILDCOPYLENGTH+MINMATCH)
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB (1<<10)
+#define MB (1<<20)
+#define GB (1U<<30)
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1<<MAXD_LOG) - 1)
+#define STEPSIZE sizeof(size_t)
+
+#define ML_BITS	4
+#define ML_MASK	((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;
+
+/*-************************************
+ *	Reading and writing into memory
+ **************************************/
+
+static inline U16 LZ4_read16(const void *memPtr)
+{
+		U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline U32 LZ4_read32(const void *memPtr)
+{
+		U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline size_t LZ4_read_ARCH(const void *memPtr)
+{
+		size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static inline void LZ4_write16(void *memPtr, U16 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline void LZ4_write32(void *memPtr, U32 value)
+{
+		memcpy(memPtr, &value, sizeof(value));
+}
+
+static inline U16 LZ4_readLE16(const void *memPtr)
+{
+#ifdef __LITTLE_ENDIAN__
+		return LZ4_read16(memPtr);
 #else
-#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+		const BYTE *p = (const BYTE *)memPtr;
+
+		return (U16)((U16)p[0] + (p[1] << 8));
 #endif
+}
+
+static inline void LZ4_writeLE16(void *memPtr, U16 value)
+{
+#ifdef __LITTLE_ENDIAN__
+		LZ4_write16(memPtr, value);
+#else
+		BYTE *p = (BYTE *)memPtr;
 
-#else	/* 32-bit */
-#define STEPSIZE 4
+		p[0] = (BYTE) value;
+		p[1] = (BYTE)(value>>8);
+#endif
+}
 
-#define LZ4_COPYSTEP(s, d)	\
-	do {			\
-		PUT4(s, d);	\
-		d += 4;		\
-		s += 4;		\
-	} while (0)
+static inline void LZ4_copy8(void *dst, const void *src)
+{
+		memcpy(dst, src, 8);
+}
 
-#define LZ4_COPYPACKET(s, d)		\
-	do {				\
-		LZ4_COPYSTEP(s, d);	\
-		LZ4_COPYSTEP(s, d);	\
-	} while (0)
+/*
+ * customized variant of memcpy,
+ * which can overwrite up to 7 bytes beyond dstEnd
+ */
+static inline void LZ4_wildCopy(void *dstPtr, const void *srcPtr, void *dstEnd)
+{
+		BYTE *d = (BYTE *)dstPtr;
+		const BYTE *s = (const BYTE *)srcPtr;
+		BYTE *const e = (BYTE *)dstEnd;
 
-#define LZ4_SECURECOPY	LZ4_WILDCOPY
-#define HTYPE const u8*
+		do { LZ4_copy8(d, s); d += 8; s += 8; } while (d < e);
+}
 
-#ifdef __BIG_ENDIAN
+#if LZ4_ARCH64
+#ifdef __BIG_ENDIAN__
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#endif
+#else
+#ifdef __BIG_ENDIAN__
 #define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
 #else
 #define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
 #endif
+#endif
 
+static inline unsigned int LZ4_count(const BYTE *pIn, const BYTE *pMatch,
+	const BYTE *pInLimit)
+{
+	const BYTE *const pStart = pIn;
+
+	while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+		size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+
+		if (!diff) {
+			pIn += STEPSIZE;
+			pMatch += STEPSIZE;
+			continue;
+		}
+		pIn += LZ4_NBCOMMONBYTES(diff);
+		return (unsigned int)(pIn - pStart);
+	}
+
+#ifdef LZ4_ARCH64
+	if ((pIn < (pInLimit-3))
+		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
+		pIn += 4; pMatch += 4;
+	}
 #endif
+	if ((pIn < (pInLimit-1))
+		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
+		pIn += 2; pMatch += 2;
+	}
+	if ((pIn < pInLimit) && (*pMatch == *pIn))
+		pIn++;
+	return (unsigned int)(pIn - pStart);
+}
+
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
 
-#define LZ4_WILDCOPY(s, d, e)		\
-	do {				\
-		LZ4_COPYPACKET(s, d);	\
-	} while (d < e)
-
-#define LZ4_BLINDCOPY(s, d, l)	\
-	do {	\
-		u8 *e = (d) + l;	\
-		LZ4_WILDCOPY(s, d, e);	\
-		d = e;	\
-	} while (0)
+typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index f344f76..407fd33 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,19 +1,17 @@
 /*
  * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2012, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ * Copyright (C) 2011-2015, Yann Collet.
  *
+ * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
- *     * Redistributions of source code must retain the above copyright
+ *		* Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
+ *		* Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
- *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -25,323 +23,353 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
  * You can contact the author at :
- * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
- * - LZ4 source repository : http://code.google.com/p/lz4/
+ *	- LZ4 homepage : http://www.lz4.org
+ *	- LZ4 source repository : https://github.com/lz4/lz4
  *
- *  Changed for kernel use by:
- *  Chanho Min <chanho.min@lge.com>
+ *	Changed for kernel usage by:
+ *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+/*-************************************
+ *	Dependencies
+ **************************************/
 #include <linux/lz4.h>
-#include <asm/unaligned.h>
 #include "lz4defs.h"
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h> /* memset */
 
-struct lz4hc_data {
-	const u8 *base;
-	HTYPE hashtable[HASHTABLESIZE];
-	u16 chaintable[MAXD];
-	const u8 *nexttoupdate;
-} __attribute__((__packed__));
+/* *************************************
+ *	Local Constants and types
+ ***************************************/
 
-static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
+
+#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
+	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
+#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
+
+static U32 LZ4HC_hashPtr(const void *ptr)
 {
-	memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
-	memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
-
-#if LZ4_ARCH64
-	hc4->nexttoupdate = base + 1;
-#else
-	hc4->nexttoupdate = base;
-#endif
-	hc4->base = base;
-	return 1;
+	return HASH_FUNCTION(LZ4_read32(ptr));
+}
+
+/**************************************
+ *	HC Compression
+ **************************************/
+static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
+{
+	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
+	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
+	hc4->nextToUpdate = 64 * KB;
+	hc4->base = start - 64 * KB;
+	hc4->end = start;
+	hc4->dictBase = start - 64 * KB;
+	hc4->dictLimit = 64 * KB;
+	hc4->lowLimit = 64 * KB;
 }
 
 /* Update chains up to ip (excluded) */
-static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+static inline void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
+	const BYTE *ip)
 {
-	u16 *chaintable = hc4->chaintable;
-	HTYPE *hashtable  = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const hashTable	= hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
+	U32 const target = (U32)(ip - base);
+	U32 idx = hc4->nextToUpdate;
+
+	while (idx < target) {
+		U32 const h = LZ4HC_hashPtr(base + idx);
+		size_t delta = idx - hashTable[h];
 
-	while (hc4->nexttoupdate < ip) {
-		const u8 *p = hc4->nexttoupdate;
-		size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
 		if (delta > MAX_DISTANCE)
 			delta = MAX_DISTANCE;
-		chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
-		hashtable[HASH_VALUE(p)] = (p) - base;
-		hc4->nexttoupdate++;
-	}
-}
 
-static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
-		const u8 *const matchlimit)
-{
-	const u8 *p1t = p1;
-
-	while (p1t < matchlimit - (STEPSIZE - 1)) {
-#if LZ4_ARCH64
-		u64 diff = A64(p2) ^ A64(p1t);
-#else
-		u32 diff = A32(p2) ^ A32(p1t);
-#endif
-		if (!diff) {
-			p1t += STEPSIZE;
-			p2 += STEPSIZE;
-			continue;
-		}
-		p1t += LZ4_NBCOMMONBYTES(diff);
-		return p1t - p1;
-	}
-#if LZ4_ARCH64
-	if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
-		p1t += 4;
-		p2 += 4;
-	}
-#endif
+		DELTANEXTU16(idx) = (U16)delta;
 
-	if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
-		p1t += 2;
-		p2 += 2;
+		hashTable[h] = idx;
+		idx++;
 	}
-	if ((p1t < matchlimit) && (*p2 == *p1t))
-		p1t++;
-	return p1t - p1;
+
+	hc4->nextToUpdate = target;
 }
 
-static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
-		const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+static inline int LZ4HC_InsertAndFindBestMatch(
+	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
+	const BYTE *ip,
+	const BYTE * const iLimit,
+	const BYTE **matchpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-	const u8 *ref;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	int nbattempts = MAX_NB_ATTEMPTS;
-	size_t repl = 0, ml = 0;
-	u16 delta;
+	const BYTE * const dictBase = hc4->dictBase;
+	const U32 dictLimit = hc4->dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	U32 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	size_t ml = 0;
 
 	/* HC4 match finder */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	/* potential repetition */
-	if (ref >= ip-4) {
-		/* confirmed */
-		if (A32(ref) == A32(ip)) {
-			delta = (u16)(ip-ref);
-			repl = ml  = lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
-			*matchpos = ref;
-		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE * const match = base + matchIndex;
+
+			if (*(match + ml) == *(ip + ml)
+				&& (LZ4_read32(match) == LZ4_read32(ip))) {
+				size_t const mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, iLimit) + MINMATCH;
 
-	while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
-		nbattempts--;
-		if (*(ref + ml) == *(ip + ml)) {
-			if (A32(ref) == A32(ip)) {
-				size_t mlt =
-					lz4hc_commonlength(ip + MINMATCH,
-					ref + MINMATCH, matchlimit) + MINMATCH;
 				if (mlt > ml) {
 					ml = mlt;
-					*matchpos = ref;
+					*matchpos = match;
+				}
+			}
+		} else {
+			const BYTE * const match = dictBase + matchIndex;
+
+			if (LZ4_read32(match) == LZ4_read32(ip)) {
+				size_t mlt;
+				const BYTE *vLimit = ip
+					+ (dictLimit - matchIndex);
+
+				if (vLimit > iLimit)
+					vLimit = iLimit;
+				mlt = LZ4_count(ip + MINMATCH,
+					match + MINMATCH, vLimit) + MINMATCH;
+				if ((ip + mlt == vLimit)
+					&& (vLimit < iLimit))
+					mlt += LZ4_count(ip + mlt,
+						base + dictLimit,
+						iLimit);
+				if (mlt > ml) {
+					/* virtual matchpos */
+					ml = mlt;
+					*matchpos = base + matchIndex;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
-	}
-
-	/* Complete table */
-	if (repl) {
-		const BYTE *ptr = ip;
-		const BYTE *end;
-		end = ip + repl - (MINMATCH-1);
-		/* Pre-Load */
-		while (ptr < end - delta) {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			ptr++;
-		}
-		do {
-			chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
-			/* Head of chain */
-			hashtable[HASH_VALUE(ptr)] = (ptr) - base;
-			ptr++;
-		} while (ptr < end);
-		hc4->nexttoupdate = end;
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
 
 	return (int)ml;
 }
 
-static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
-	const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
-	const u8 **matchpos, const u8 **startpos)
+static inline int LZ4HC_InsertAndGetWiderMatch(
+	LZ4HC_CCtx_internal *hc4,
+	const BYTE * const ip,
+	const BYTE * const iLowLimit,
+	const BYTE * const iHighLimit,
+	int longest,
+	const BYTE **matchpos,
+	const BYTE **startpos,
+	const int maxNbAttempts)
 {
-	u16 *const chaintable = hc4->chaintable;
-	HTYPE *const hashtable = hc4->hashtable;
-#if LZ4_ARCH64
+	U16 * const chainTable = hc4->chainTable;
+	U32 * const HashTable = hc4->hashTable;
 	const BYTE * const base = hc4->base;
-#else
-	const int base = 0;
-#endif
-	const u8 *ref;
-	int nbattempts = MAX_NB_ATTEMPTS;
-	int delta = (int)(ip - startlimit);
+	const U32 dictLimit = hc4->dictLimit;
+	const BYTE * const lowPrefixPtr = base + dictLimit;
+	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
+		? hc4->lowLimit
+		: (U32)(ip - base) - (64 * KB - 1);
+	const BYTE * const dictBase = hc4->dictBase;
+	U32	 matchIndex;
+	int nbAttempts = maxNbAttempts;
+	int delta = (int)(ip - iLowLimit);
 
 	/* First Match */
-	lz4hc_insert(hc4, ip);
-	ref = hashtable[HASH_VALUE(ip)] + base;
-
-	while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
-		&& (nbattempts)) {
-		nbattempts--;
-		if (*(startlimit + longest) == *(ref - delta + longest)) {
-			if (A32(ref) == A32(ip)) {
-				const u8 *reft = ref + MINMATCH;
-				const u8 *ipt = ip + MINMATCH;
-				const u8 *startt = ip;
-
-				while (ipt < matchlimit-(STEPSIZE - 1)) {
-					#if LZ4_ARCH64
-					u64 diff = A64(reft) ^ A64(ipt);
-					#else
-					u32 diff = A32(reft) ^ A32(ipt);
-					#endif
-
-					if (!diff) {
-						ipt += STEPSIZE;
-						reft += STEPSIZE;
-						continue;
+	LZ4HC_Insert(hc4, ip);
+	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
+
+	while ((matchIndex >= lowLimit)
+		&& (nbAttempts)) {
+		nbAttempts--;
+		if (matchIndex >= dictLimit) {
+			const BYTE *matchPtr = base + matchIndex;
+
+			if (*(iLowLimit + longest)
+				== *(matchPtr - delta + longest)) {
+				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+					int mlt = MINMATCH +
+						LZ4_count(ip + MINMATCH,
+							matchPtr + MINMATCH,
+							iHighLimit);
+					int back = 0;
+
+					while ((ip + back > iLowLimit)
+							 && (matchPtr + back > lowPrefixPtr)
+							 && (ip[back - 1] == matchPtr[back - 1]))
+						back--;
+
+					mlt -= back;
+
+					if (mlt > longest) {
+						longest = (int)mlt;
+						*matchpos = matchPtr + back;
+						*startpos = ip + back;
 					}
-					ipt += LZ4_NBCOMMONBYTES(diff);
-					goto _endcount;
-				}
-				#if LZ4_ARCH64
-				if ((ipt < (matchlimit - 3))
-					&& (A32(reft) == A32(ipt))) {
-					ipt += 4;
-					reft += 4;
-				}
-				ipt += 2;
-				#endif
-				if ((ipt < (matchlimit - 1))
-					&& (A16(reft) == A16(ipt))) {
-					reft += 2;
 				}
-				if ((ipt < matchlimit) && (*reft == *ipt))
-					ipt++;
-_endcount:
-				reft = ref;
-
-				while ((startt > startlimit)
-					&& (reft > hc4->base)
-					&& (startt[-1] == reft[-1])) {
-					startt--;
-					reft--;
-				}
-
-				if ((ipt - startt) > longest) {
-					longest = (int)(ipt - startt);
-					*matchpos = reft;
-					*startpos = startt;
+			}
+		} else {
+			const BYTE * const matchPtr = dictBase + matchIndex;
+
+			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
+				size_t mlt;
+				int back = 0;
+				const BYTE *vLimit = ip + (dictLimit - matchIndex);
+
+				if (vLimit > iHighLimit)
+					vLimit = iHighLimit;
+
+				mlt = LZ4_count(ip + MINMATCH,
+					matchPtr + MINMATCH, vLimit) + MINMATCH;
+
+				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
+					mlt += LZ4_count(ip + mlt, base + dictLimit,
+						iHighLimit);
+				while ((ip + back > iLowLimit)
+					&& (matchIndex + back > lowLimit)
+					&& (ip[back - 1] == matchPtr[back - 1]))
+					back--;
+				mlt -= back;
+				if ((int)mlt > longest) {
+					longest = (int)mlt;
+					*matchpos = base + matchIndex + back;
+					*startpos = ip + back;
 				}
 			}
 		}
-		ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+		matchIndex -= DELTANEXTU16(matchIndex);
 	}
+
 	return longest;
 }
 
-static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
-		int ml, const u8 *ref)
+static inline int LZ4HC_encodeSequence(
+	const BYTE **ip,
+	BYTE **op,
+	const BYTE **anchor,
+	int matchLength,
+	const BYTE * const match,
+	limitedOutput_directive limitedOutputBuffer,
+	BYTE *oend)
 {
-	int length, len;
-	u8 *token;
+	int length;
+	BYTE *token;
 
 	/* Encode Literal length */
 	length = (int)(*ip - *anchor);
 	token = (*op)++;
+
+	if ((limitedOutputBuffer)
+		&& ((*op + (length>>8)
+			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
 	if (length >= (int)RUN_MASK) {
-		*token = (RUN_MASK << ML_BITS);
+		int len;
+
+		*token = (RUN_MASK<<ML_BITS);
 		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
+		for (; len > 254 ; len -= 255) {
 			*(*op)++ = 255;
-		*(*op)++ = (u8)len;
+			*(*op)++ = (BYTE)len;
+		}
 	} else
-		*token = (length << ML_BITS);
+		*token = (BYTE)(length<<ML_BITS);
 
 	/* Copy Literals */
-	LZ4_BLINDCOPY(*anchor, *op, length);
+	LZ4_wildCopy(*op, *anchor, (*op) + length);
+	*op += length;
 
 	/* Encode Offset */
-	LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+	LZ4_writeLE16(*op, (U16)(*ip - match)); *op += 2;
 
 	/* Encode MatchLength */
-	len = (int)(ml - MINMATCH);
-	if (len >= (int)ML_MASK) {
+	length = (int)(matchLength - MINMATCH);
+	if ((limitedOutputBuffer)
+		&& (*op + (length>>8)
+			+ (1 + LASTLITERALS) > oend)) {
+		/* Check output limit */
+		return 1;
+	}
+	if (length >= (int)ML_MASK) {
 		*token += ML_MASK;
-		len -= ML_MASK;
-		for (; len > 509 ; len -= 510) {
+		length -= ML_MASK;
+		for (; length > 509 ; length -= 510) {
 			*(*op)++ = 255;
 			*(*op)++ = 255;
 		}
-		if (len > 254) {
-			len -= 255;
+		if (length > 254) {
+			length -= 255;
 			*(*op)++ = 255;
 		}
-		*(*op)++ = (u8)len;
+		*(*op)++ = (BYTE)length;
 	} else
-		*token += len;
+		*token += (BYTE)(length);
 
 	/* Prepare next loop */
-	*ip += ml;
+	*ip += matchLength;
 	*anchor = *ip;
 
 	return 0;
 }
 
-static int lz4_compresshcctx(struct lz4hc_data *ctx,
-		const char *source,
-		char *dest,
-		int isize)
+static int LZ4HC_compress_generic(
+	LZ4HC_CCtx_internal *const ctx,
+	const char * const source,
+	char * const dest,
+	int const inputSize,
+	int const maxOutputSize,
+	int compressionLevel,
+	limitedOutput_directive limit
+	)
 {
-	const u8 *ip = (const u8 *)source;
-	const u8 *anchor = ip;
-	const u8 *const iend = ip + isize;
-	const u8 *const mflimit = iend - MFLIMIT;
-	const u8 *const matchlimit = (iend - LASTLITERALS);
+	const BYTE *ip = (const BYTE *) source;
+	const BYTE *anchor = ip;
+	const BYTE * const iend = ip + inputSize;
+	const BYTE * const mflimit = iend - MFLIMIT;
+	const BYTE * const matchlimit = (iend - LASTLITERALS);
 
-	u8 *op = (u8 *)dest;
+	BYTE *op = (BYTE *) dest;
+	BYTE * const oend = op + maxOutputSize;
 
+	unsigned int maxNbAttempts;
 	int ml, ml2, ml3, ml0;
-	const u8 *ref = NULL;
-	const u8 *start2 = NULL;
-	const u8 *ref2 = NULL;
-	const u8 *start3 = NULL;
-	const u8 *ref3 = NULL;
-	const u8 *start0;
-	const u8 *ref0;
-	int lastrun;
+	const BYTE *ref = NULL;
+	const BYTE *start2 = NULL;
+	const BYTE *ref2 = NULL;
+	const BYTE *start3 = NULL;
+	const BYTE *ref3 = NULL;
+	const BYTE *start0;
+	const BYTE *ref0;
+
+	/* init */
+	if (compressionLevel > LZ4HC_MAX_CLEVEL)
+		compressionLevel = LZ4HC_MAX_CLEVEL;
+	if (compressionLevel < 1)
+		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
+	maxNbAttempts = 1 << (compressionLevel - 1);
+	ctx->end += inputSize;
 
 	ip++;
 
 	/* Main Loop */
 	while (ip < mflimit) {
-		ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
+			matchlimit, (&ref), maxNbAttempts);
 		if (!ml) {
 			ip++;
 			continue;
@@ -351,46 +379,52 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		start0 = ip;
 		ref0 = ref;
 		ml0 = ml;
-_search2:
-		if (ip+ml < mflimit)
-			ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
-				ip + 1, matchlimit, ml, &ref2, &start2);
+
+_Search2:
+		if (ip + ml < mflimit)
+			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				ip + ml - 2, ip + 0,
+				matchlimit, ml, &ref2,
+				&start2, maxNbAttempts);
 		else
 			ml2 = ml;
-		/* No better match */
+
 		if (ml2 == ml) {
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			/* No better match */
+			if (LZ4HC_encodeSequence(&ip, &op,
+				&anchor, ml, ref, limit, oend))
+				return 0;
 			continue;
 		}
 
 		if (start0 < ip) {
-			/* empirical */
 			if (start2 < ip + ml0) {
+				/* empirical */
 				ip = start0;
 				ref = ref0;
 				ml = ml0;
 			}
 		}
-		/*
-		 * Here, start0==ip
-		 * First Match too small : removed
-		 */
+
+		/* Here, start0 == ip */
 		if ((start2 - ip) < 3) {
+			/* First Match too small : removed */
 			ml = ml2;
 			ip = start2;
 			ref = ref2;
-			goto _search2;
+			goto _Search2;
 		}
 
-_search3:
+_Search3:
 		/*
-		 * Currently we have :
-		 * ml2 > ml1, and
-		 * ip1+3 <= ip2 (usually < ip1+ml1)
-		 */
+		* Currently we have :
+		* ml2 > ml1, and
+		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
+		*/
 		if ((start2 - ip) < OPTIMAL_ML) {
 			int correction;
 			int new_ml = ml;
+
 			if (new_ml > OPTIMAL_ML)
 				new_ml = OPTIMAL_ML;
 			if (ip + new_ml > start2 + ml2 - MINMATCH)
@@ -403,39 +437,44 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			}
 		}
 		/*
-		 * Now, we have start2 = ip+new_ml,
-		 * with new_ml=min(ml, OPTIMAL_ML=18)
+		 * Now, we have start2 = ip + new_ml,
+		 * with new_ml = min(ml, OPTIMAL_ML = 18)
 		 */
+
 		if (start2 + ml2 < mflimit)
-			ml3 = lz4hc_insertandgetwidermatch(ctx,
-				start2 + ml2 - 3, start2, matchlimit,
-				ml2, &ref3, &start3);
+			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+				start2 + ml2 - 3, start2,
+				matchlimit, ml2, &ref3, &start3,
+				maxNbAttempts);
 		else
 			ml3 = ml2;
 
-		/* No better match : 2 sequences to encode */
 		if (ml3 == ml2) {
+			/* No better match : 2 sequences to encode */
 			/* ip & ref are known; Now for ml */
-			if (start2 < ip+ml)
+			if (start2 < ip + ml)
 				ml = (int)(start2 - ip);
-
 			/* Now, encode 2 sequences */
-			lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml, ref, limit, oend))
+				return 0;
 			ip = start2;
-			lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+				ml2, ref2, limit, oend))
+				return 0;
 			continue;
 		}
 
-		/* Not enough space for match 2 : remove it */
 		if (start3 < ip + ml + 3) {
-			/*
-			 * can write Seq1 immediately ==> Seq2 is removed,
-			 * so Seq3 becomes Seq1
-			 */
+			/* Not enough space for match 2 : remove it */
 			if (start3 >= (ip + ml)) {
+				/* can write Seq1 immediately
+				 * ==> Seq2 is removed,
+				 * so Seq3 becomes Seq1
+				 */
 				if (start2 < ip + ml) {
-					int correction =
-						(int)(ip + ml - start2);
+					int correction = (int)(ip + ml - start2);
+
 					start2 += correction;
 					ref2 += correction;
 					ml2 -= correction;
@@ -446,35 +485,38 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 					}
 				}
 
-				lz4_encodesequence(&ip, &op, &anchor, ml, ref);
-				ip  = start3;
+				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
+					ml, ref, limit, oend))
+					return 0;
+				ip	= start3;
 				ref = ref3;
-				ml  = ml3;
+				ml	= ml3;
 
 				start0 = start2;
 				ref0 = ref2;
 				ml0 = ml2;
-				goto _search2;
+				goto _Search2;
 			}
 
 			start2 = start3;
 			ref2 = ref3;
 			ml2 = ml3;
-			goto _search3;
+			goto _Search3;
 		}
 
 		/*
-		 * OK, now we have 3 ascending matches; let's write at least
-		 * the first one ip & ref are known; Now for ml
-		 */
+		* OK, now we have 3 ascending matches;
+		* let's write at least the first one
+		* ip & ref are known; Now for ml
+		*/
 		if (start2 < ip + ml) {
 			if ((start2 - ip) < (int)ML_MASK) {
 				int correction;
+
 				if (ml > OPTIMAL_ML)
 					ml = OPTIMAL_ML;
 				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2
-						- MINMATCH;
+					ml = (int)(start2 - ip) + ml2 - MINMATCH;
 				correction = ml - (int)(start2 - ip);
 				if (correction > 0) {
 					start2 += correction;
@@ -484,7 +526,9 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 			} else
 				ml = (int)(start2 - ip);
 		}
-		lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
+			ref, limit, oend))
+			return 0;
 
 		ip = start2;
 		ref = ref2;
@@ -494,46 +538,243 @@ static int lz4_compresshcctx(struct lz4hc_data *ctx,
 		ref2 = ref3;
 		ml2 = ml3;
 
-		goto _search3;
+		goto _Search3;
 	}
 
 	/* Encode Last Literals */
-	lastrun = (int)(iend - anchor);
-	if (lastrun >= (int)RUN_MASK) {
-		*op++ = (RUN_MASK << ML_BITS);
-		lastrun -= RUN_MASK;
-		for (; lastrun > 254 ; lastrun -= 255)
-			*op++ = 255;
-		*op++ = (u8) lastrun;
-	} else
-		*op++ = (lastrun << ML_BITS);
-	memcpy(op, anchor, iend - anchor);
-	op += iend - anchor;
+	{ int lastRun = (int)(iend - anchor);
+
+		if ((limit)
+			&& (((char *)op - dest) + lastRun + 1
+				+ ((lastRun + 255 - RUN_MASK)/255)
+					> (U32)maxOutputSize)) {
+			/* Check output limit */
+			return 0;
+		}
+		if (lastRun >= (int)RUN_MASK) {
+			*op++ = (RUN_MASK<<ML_BITS);
+			lastRun -= RUN_MASK;
+			for (; lastRun > 254 ; lastRun -= 255) {
+				*op++ = 255;
+				*op++ = (BYTE) lastRun;
+			}
+		} else
+			*op++ = (BYTE)(lastRun<<ML_BITS);
+		memcpy(op, anchor, iend - anchor);
+		op += iend - anchor;
+	}
+
 	/* End */
 	return (int) (((char *)op) - dest);
 }
 
-int lz4hc_compress(const unsigned char *src, size_t src_len,
-			unsigned char *dst, size_t *dst_len, void *wrkmem)
+int LZ4_compress_HC_extStateHC(
+	void *state,
+	const char *src,
+	char *dst,
+	int srcSize,
+	int maxDstSize,
+	int compressionLevel)
 {
-	int ret = -1;
-	int out_len = 0;
+	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
 
-	struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
-	lz4hc_init(hc4, (const u8 *)src);
-	out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
-		(char *)dst, (int)src_len);
+	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
+		/* Error : state is not aligned
+		 * for pointers (32 or 64 bits)
+		 */
+		return 0;
+	}
 
-	if (out_len < 0)
-		goto exit;
+	LZ4HC_init(ctx, (const BYTE *)src);
 
-	*dst_len = out_len;
-	return 0;
+	if (maxDstSize < LZ4_compressBound(srcSize))
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, limitedOutput);
+	else
+		return LZ4HC_compress_generic(ctx, src, dst,
+			srcSize, maxDstSize, compressionLevel, noLimit);
+}
 
-exit:
-	return ret;
+int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+	int maxDstSize, int compressionLevel, void *wrkmem)
+{
+	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
+		srcSize, maxDstSize, compressionLevel);
+}
+EXPORT_SYMBOL(LZ4_compress_HC);
+
+/*-******************************
+ *	For backwards compatibility
+ ********************************/
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+	unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+	*dst_len = LZ4_compress_HC(src, dst, (int)src_len,
+		(int)((size_t)dst_len), LZ4HC_DEFAULT_CLEVEL, wrkmem);
+
+	/*
+	 * Prior lz4hc_compress will return -1 in case of error
+	 * and 0 on success
+	 * while new LZ4_compress_HC
+	 * returns 0 in case of error
+	 * and the output length on success
+	 */
+	if (!dst_len)
+		return -1;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(lz4hc_compress);
 
+/**************************************
+ *	Streaming Functions
+ **************************************/
+void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
+{
+	LZ4_streamHCPtr->internal_donotuse.base = NULL;
+	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
+}
+
+int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *dictionary,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	if (dictSize > 64 * KB) {
+		dictionary += dictSize - 64 * KB;
+		dictSize = 64 * KB;
+	}
+	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
+	if (dictSize >= 4)
+		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
+	ctxPtr->end = (const BYTE *)dictionary + dictSize;
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_loadDictHC);
+
+/* compression */
+
+static void LZ4HC_setExternalDict(
+	LZ4HC_CCtx_internal *ctxPtr,
+	const BYTE *newBlock)
+{
+	if (ctxPtr->end >= ctxPtr->base + 4) {
+		/* Referencing remaining dictionary content */
+		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
+	}
+
+	/*
+	 * Only one memory segment for extDict,
+	 * so any previous extDict is lost at this stage
+	 */
+	ctxPtr->lowLimit	= ctxPtr->dictLimit;
+	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
+	ctxPtr->dictBase	= ctxPtr->base;
+	ctxPtr->base = newBlock - ctxPtr->dictLimit;
+	ctxPtr->end	= newBlock;
+	/* match referencing will resume from there */
+	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
+}
+EXPORT_SYMBOL(LZ4HC_setExternalDict);
+
+static int LZ4_compressHC_continue_generic(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize,
+	limitedOutput_directive limit)
+{
+	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
+
+	/* auto - init if forgotten */
+	if (ctxPtr->base == NULL)
+		LZ4HC_init(ctxPtr, (const BYTE *) source);
+
+	/* Check overflow */
+	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
+		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
+			- ctxPtr->dictLimit;
+		if (dictSize > 64 * KB)
+			dictSize = 64 * KB;
+		LZ4_loadDictHC(LZ4_streamHCPtr,
+			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
+	}
+
+	/* Check if blocks follow each other */
+	if ((const BYTE *)source != ctxPtr->end)
+		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
+
+	/* Check overlapping input/dictionary space */
+	{ const BYTE *sourceEnd = (const BYTE *) source + inputSize;
+		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
+		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
+
+		if ((sourceEnd > dictBegin)
+			&& ((const BYTE *)source < dictEnd)) {
+			if (sourceEnd > dictEnd)
+				sourceEnd = dictEnd;
+			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
+
+			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
+				ctxPtr->lowLimit = ctxPtr->dictLimit;
+		}
+	}
+
+	return LZ4HC_compress_generic(ctxPtr, source, dest,
+		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
+}
+
+int LZ4_compress_HC_continue(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	const char *source,
+	char *dest,
+	int inputSize,
+	int maxOutputSize)
+{
+	if (maxOutputSize < LZ4_compressBound(inputSize))
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, limitedOutput);
+	else
+		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
+			source, dest, inputSize, maxOutputSize, noLimit);
+}
+EXPORT_SYMBOL(LZ4_compress_HC_continue);
+
+/* dictionary saving */
+
+int LZ4_saveDictHC(
+	LZ4_streamHC_t *LZ4_streamHCPtr,
+	char *safeBuffer,
+	int dictSize)
+{
+	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
+	int const prefixSize = (int)(streamPtr->end
+		- (streamPtr->base + streamPtr->dictLimit));
+
+	if (dictSize > 64 * KB)
+		dictSize = 64 * KB;
+	if (dictSize < 4)
+		dictSize = 0;
+	if (dictSize > prefixSize)
+		dictSize = prefixSize;
+
+	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
+
+	{ U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
+
+		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
+		streamPtr->base = streamPtr->end - endIndex;
+		streamPtr->dictLimit = endIndex - dictSize;
+		streamPtr->lowLimit = endIndex - dictSize;
+
+		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
+			streamPtr->nextToUpdate = streamPtr->dictLimit;
+	}
+	return dictSize;
+}
+EXPORT_SYMBOL(LZ4_saveDictHC);
+
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4HC compressor");
+MODULE_DESCRIPTION("LZ4 HC compressor");
-- 
2.1.4

^ permalink raw reply related

* [PATCH 2/4] lib/decompress_unlz4: Change module to work with new LZ4 module version
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt
In-Reply-To: <1485011351-7463-1-git-send-email-4sschmid@informatik.uni-hamburg.de>

This patch updates the unlz4 wrapper to work with the
updated LZ4 kernel module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 lib/decompress_unlz4.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
index 036fc88..1b0baf3 100644
--- a/lib/decompress_unlz4.c
+++ b/lib/decompress_unlz4.c
@@ -72,7 +72,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 		error("NULL input pointer and missing fill function");
 		goto exit_1;
 	} else {
-		inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+		inp = large_malloc(LZ4_compressBound(uncomp_chunksize));
 		if (!inp) {
 			error("Could not allocate input buffer");
 			goto exit_1;
@@ -136,7 +136,7 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			inp += 4;
 			size -= 4;
 		} else {
-			if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+			if (chunksize > LZ4_compressBound(uncomp_chunksize)) {
 				error("chunk length is longer than allocated");
 				goto exit_2;
 			}
@@ -152,11 +152,14 @@ STATIC inline int INIT unlz4(u8 *input, long in_len,
 			out_len -= dest_len;
 		} else
 			dest_len = out_len;
-		ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+
+		ret = LZ4_decompress_fast(inp, outp, dest_len);
+		chunksize = ret;
 #else
 		dest_len = uncomp_chunksize;
-		ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
-				&dest_len);
+
+		ret = LZ4_decompress_safe(inp, outp, chunksize, dest_len);
+		dest_len = ret;
 #endif
 		if (ret < 0) {
 			error("Decoding failed");
-- 
2.1.4

^ permalink raw reply related

* [PATCH 3/4] crypto: Change LZ4 modules to work with new LZ4 module version
From: Sven Schmidt @ 2017-01-21 15:09 UTC (permalink / raw)
  To: akpm
  Cc: bongkyu.kim, rsalvaterra, sergey.senozhatsky, gregkh,
	linux-kernel, herbert, davem, linux-crypto, anton, ccross,
	keescook, tony.luck, phillip, Sven Schmidt
In-Reply-To: <1485011351-7463-1-git-send-email-4sschmid@informatik.uni-hamburg.de>

This patch updates the crypto modules using LZ4 compression
to work with the new LZ4 module version.

Signed-off-by: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
---
 crypto/lz4.c   | 21 ++++++++-------------
 crypto/lz4hc.c | 21 ++++++++-------------
 2 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/crypto/lz4.c b/crypto/lz4.c
index 99c1b2c..40fd2c2 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -66,15 +66,13 @@ static void lz4_exit(struct crypto_tfm *tfm)
 static int __lz4_compress_crypto(const u8 *src, unsigned int slen,
 				 u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_default(src, dst,
+		slen, (int)((size_t)dlen), ctx);
 
-	err = lz4_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (!out_len)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -96,16 +94,13 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4_decompress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 75ffc4a..6f16f96 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -65,15 +65,13 @@ static void lz4hc_exit(struct crypto_tfm *tfm)
 static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen,
 				   u8 *dst, unsigned int *dlen, void *ctx)
 {
-	size_t tmp_len = *dlen;
-	int err;
+	int out_len = LZ4_compress_HC(src, dst, slen,
+		(int)((size_t)dlen), LZ4HC_DEFAULT_CLEVEL, ctx);
 
-	err = lz4hc_compress(src, slen, dst, &tmp_len, ctx);
-
-	if (err < 0)
+	if (out_len == 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
+	*dlen = out_len;
 	return 0;
 }
 
@@ -97,16 +95,13 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
 static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen,
 				     u8 *dst, unsigned int *dlen, void *ctx)
 {
-	int err;
-	size_t tmp_len = *dlen;
-	size_t __slen = slen;
+	int out_len = LZ4_decompress_safe(src, dst, slen, (int)((size_t)dlen));
 
-	err = lz4_decompress_unknownoutputsize(src, __slen, dst, &tmp_len);
-	if (err < 0)
+	if (out_len < 0)
 		return -EINVAL;
 
-	*dlen = tmp_len;
-	return err;
+	*dlen = out_len;
+	return out_len;
 }
 
 static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src,
-- 
2.1.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox