All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Eric Bénard" <eric@eukrea.com>
To: linux-kernel@vger.kernel.org
Cc: nicolas.ferre@atmel.com, linux-arm-kernel@lists.infradead.org,
	linux-crypto@vger.kernel.org, herbert@gondor.apana.org.au,
	davem@davemloft.net, plagnioj@jcrosoft.com, nicolas@eukrea.com,
	eric@eukrea.com
Subject: [PATCH 4/5] crypto: add Atmel SHA1/SHA256 driver
Date: Sun,  1 Jul 2012 19:19:46 +0200	[thread overview]
Message-ID: <1341163187-14946-5-git-send-email-eric@eukrea.com> (raw)
In-Reply-To: <1341163187-14946-1-git-send-email-eric@eukrea.com>

From: Nicolas Royer <nicolas@eukrea.com>

Signed-off-by: Nicolas Royer <nicolas@eukrea.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Eric Bénard <eric@eukrea.com>
Tested-by: Eric Bénard <eric@eukrea.com>
---
 drivers/crypto/Kconfig          |   14 +
 drivers/crypto/Makefile         |    1 +
 drivers/crypto/atmel-sha-regs.h |   46 ++
 drivers/crypto/atmel-sha.c      | 1112 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 1173 insertions(+), 0 deletions(-)
 create mode 100644 drivers/crypto/atmel-sha-regs.h
 create mode 100644 drivers/crypto/atmel-sha.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9ac7128..631014b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -358,4 +358,18 @@ config CRYPTO_DEV_ATMEL_TDES
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-tdes.
 
+config CRYPTO_DEV_ATMEL_SHA
+	tristate "Support for Atmel SHA1/SHA256 hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_ALGAPI
+	help
+	  Some Atmel processors have SHA1/SHA256 hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  SHA1/SHA256 algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-sha.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 211fdc2..387bee1 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
new file mode 100644
index 0000000..dc53a20
--- /dev/null
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -0,0 +1,46 @@
+#ifndef __ATMEL_SHA_REGS_H__
+#define __ATMEL_SHA_REGS_H__
+
+#define SHA_REG_DIGEST(x)		(0x80 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x40 + ((x) * 0x04))
+
+#define SHA_CR				0x00
+#define SHA_CR_START			(1 << 0)
+#define SHA_CR_FIRST			(1 << 4)
+#define SHA_CR_SWRST			(1 << 8)
+
+#define SHA_MR				0x04
+#define SHA_MR_MODE_MASK		(0x3 << 0)
+#define SHA_MR_MODE_MANUAL		0x0
+#define SHA_MR_MODE_AUTO		0x1
+#define SHA_MR_MODE_PDC			0x2
+#define	SHA_MR_DUALBUFF			(1 << 3)
+#define SHA_MR_PROCDLY			(1 << 4)
+#define SHA_MR_ALGO_SHA1		(0 << 8)
+#define SHA_MR_ALGO_SHA256		(1 << 8)
+
+#define SHA_IER				0x10
+#define SHA_IDR				0x14
+#define SHA_IMR				0x18
+#define SHA_ISR				0x1C
+#define SHA_INT_DATARDY			(1 << 0)
+#define SHA_INT_ENDTX			(1 << 1)
+#define SHA_INT_TXBUFE			(1 << 2)
+#define SHA_INT_URAD			(1 << 8)
+#define SHA_ISR_URAT_MASK		(0x7 << 12)
+#define SHA_ISR_URAT_IDR		(0x0 << 12)
+#define SHA_ISR_URAT_ODR		(0x1 << 12)
+#define SHA_ISR_URAT_MR			(0x2 << 12)
+#define SHA_ISR_URAT_WO			(0x5 << 12)
+
+#define SHA_TPR				0x108
+#define SHA_TCR				0x10C
+#define SHA_TNPR			0x118
+#define SHA_TNCR			0x11C
+#define SHA_PTCR			0x120
+#define SHA_PTCR_TXTEN		(1 << 8)
+#define SHA_PTCR_TXTDIS		(1 << 9)
+#define SHA_PTSR			0x124
+#define SHA_PTSR_TXTEN		(1 << 8)
+
+#endif /* __ATMEL_SHA_REGS_H__ */
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
new file mode 100644
index 0000000..f938b9d
--- /dev/null
+++ b/drivers/crypto/atmel-sha.c
@@ -0,0 +1,1112 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL SHA1/SHA256 HW acceleration.
+ *
+ * Copyright (c) 2012 Eukréa Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-sham.c drivers.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include "atmel-sha-regs.h"
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY			BIT(0)
+#define	SHA_FLAGS_FINAL			BIT(1)
+#define SHA_FLAGS_DMA_ACTIVE	BIT(2)
+#define SHA_FLAGS_OUTPUT_READY	BIT(3)
+#define SHA_FLAGS_INIT			BIT(4)
+#define SHA_FLAGS_CPU			BIT(5)
+#define SHA_FLAGS_DMA_READY		BIT(6)
+
+#define SHA_FLAGS_FINUP		BIT(16)
+#define SHA_FLAGS_SG		BIT(17)
+#define SHA_FLAGS_SHA1		BIT(18)
+#define SHA_FLAGS_SHA256	BIT(19)
+#define SHA_FLAGS_ERROR		BIT(20)
+#define SHA_FLAGS_PAD		BIT(21)
+
+#define SHA_FLAGS_DUALBUFF	BIT(24)
+
+#define SHA_OP_UPDATE	1
+#define SHA_OP_FINAL	2
+
+#define SHA_BUFFER_LEN		PAGE_SIZE
+
+#define ATMEL_SHA_DMA_THRESHOLD		56
+
+
+struct atmel_sha_dev;
+
+struct atmel_sha_reqctx {
+	struct atmel_sha_dev	*dd;
+	unsigned long	flags;
+	unsigned long	op;
+
+	u8	digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	size_t	digcnt;
+	size_t	bufcnt;
+	size_t	buflen;
+	dma_addr_t	dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int	offset;	/* offset in current sg */
+	unsigned int	total;	/* total request */
+
+	u8	buffer[0] __aligned(sizeof(u32));
+};
+
+struct atmel_sha_ctx {
+	struct atmel_sha_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+};
+
+#define ATMEL_SHA_QUEUE_LENGTH	1
+
+struct atmel_sha_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+	void __iomem		*io_base;
+
+	spinlock_t		lock;
+	int			err;
+	struct tasklet_struct	done_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct atmel_sha_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_sha_drv atmel_sha = {
+	.dev_list = LIST_HEAD_INIT(atmel_sha.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
+};
+
+static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_sha_write(struct atmel_sha_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
+{
+	atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
+
+	if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
+		dd->flags |= SHA_FLAGS_DUALBUFF;
+}
+
+static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0)
+			break;
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The purpose of this padding is to ensure that the padded message
+ * is a multiple of 512 bits. The bit "1" is appended at the end of
+ * the message followed by "padlen-1" zero bits. Then a 64 bits block
+ * equals to the message length in bits is appended.
+ *
+ * padlen is calculated as followed:
+ *  - if message length < 56 bytes then padlen = 56 - message length
+ *  - else padlen = 64 + 56 - message length
+ */
+static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
+{
+	unsigned int index, padlen;
+	u64 bits;
+	u64 size;
+
+	bits = (ctx->bufcnt + ctx->digcnt + length) << 3;
+	size = cpu_to_be64(bits);
+
+	index = ctx->bufcnt & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	*(ctx->buffer + ctx->bufcnt) = 0x80;
+	memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
+	memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8);
+	ctx->bufcnt += padlen + 8;
+	ctx->flags |= SHA_FLAGS_PAD;
+}
+
+static int atmel_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = NULL;
+	struct atmel_sha_dev *tmp;
+
+	spin_lock_bh(&atmel_sha.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &atmel_sha.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+
+	spin_unlock_bh(&atmel_sha.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA1;
+	else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA256;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+	ctx->buflen = SHA_BUFFER_LEN;
+
+	return 0;
+}
+
+static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
+
+	if (likely(dma)) {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
+		valmr = SHA_MR_MODE_PDC;
+		if (dd->flags & SHA_FLAGS_DUALBUFF)
+			valmr = SHA_MR_DUALBUFF;
+	} else {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+	}
+
+	if (ctx->flags & SHA_FLAGS_SHA256)
+		valmr |= SHA_MR_ALGO_SHA256;
+
+	/* Setting CR_FIRST only for the first iteration */
+	if (!ctx->digcnt)
+		valcr = SHA_CR_FIRST;
+
+	atmel_sha_write(dd, SHA_CR, valcr);
+	atmel_sha_write(dd, SHA_MR, valmr);
+}
+
+static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	atmel_sha_write_ctrl(dd, 0);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	dd->flags |= SHA_FLAGS_CPU;
+
+	for (count = 0; count < len32; count++)
+		atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
+		size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int len32;
+
+	dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length1, final);
+
+	len32 = DIV_ROUND_UP(length1, sizeof(u32));
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
+	atmel_sha_write(dd, SHA_TPR, dma_addr1);
+	atmel_sha_write(dd, SHA_TCR, len32);
+
+	len32 = DIV_ROUND_UP(length2, sizeof(u32));
+	atmel_sha_write(dd, SHA_TNPR, dma_addr2);
+	atmel_sha_write(dd, SHA_TNCR, len32);
+
+	atmel_sha_write_ctrl(dd, 1);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length1;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |=  SHA_FLAGS_DMA_ACTIVE;
+
+	/* Start DMA transfer */
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	atmel_sha_append_sg(ctx);
+	atmel_sha_fill_padding(ctx, 0);
+
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
+					struct atmel_sha_reqctx *ctx,
+					size_t length, int final)
+{
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
+				SHA1_BLOCK_SIZE);
+		return -EINVAL;
+	}
+
+	ctx->flags &= ~SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final);
+}
+
+static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	atmel_sha_append_sg(ctx);
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final)
+		atmel_sha_fill_padding(ctx, 0);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return atmel_sha_xmit_dma_map(dd, ctx, count, final);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length, final, tail;
+	struct scatterlist *sg;
+	unsigned int count;
+
+	if (!ctx->total)
+		return 0;
+
+	if (ctx->bufcnt || ctx->offset)
+		return atmel_sha_update_dma_slow(dd);
+
+	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
+			ctx->digcnt, ctx->bufcnt, ctx->total);
+
+	sg = ctx->sg;
+
+	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+		return atmel_sha_update_dma_slow(dd);
+
+	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE))
+		/* size is not SHA1_BLOCK_SIZE aligned */
+		return atmel_sha_update_dma_slow(dd);
+
+	length = min(ctx->total, sg->length);
+
+	if (sg_is_last(sg)) {
+		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
+			/* not last sg must be SHA1_BLOCK_SIZE aligned */
+			tail = length & (SHA1_BLOCK_SIZE - 1);
+			length -= tail;
+			if (length == 0) {
+				/* offset where to start slow */
+				ctx->offset = length;
+				return atmel_sha_update_dma_slow(dd);
+			}
+		}
+	}
+
+	ctx->total -= length;
+	ctx->offset = length; /* offset where to start slow */
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	/* Add padding */
+	if (final) {
+		tail = length & (SHA1_BLOCK_SIZE - 1);
+		length -= tail;
+		ctx->total += tail;
+		ctx->offset = length; /* offset where to start slow */
+
+		sg = ctx->sg;
+		atmel_sha_append_sg(ctx);
+
+		atmel_sha_fill_padding(ctx, length);
+
+		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+			ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+			dev_err(dd->dev, "dma %u bytes error\n",
+				ctx->buflen + SHA1_BLOCK_SIZE);
+			return -EINVAL;
+		}
+
+		if (length == 0) {
+			ctx->flags &= ~SHA_FLAGS_SG;
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0,
+					0, final);
+		} else {
+			ctx->sg = sg;
+			if (!dma_map_sg(dd->dev, ctx->sg, 1,
+				DMA_TO_DEVICE)) {
+					dev_err(dd->dev, "dma_map_sg  error\n");
+					return -EINVAL;
+			}
+
+			ctx->flags |= SHA_FLAGS_SG;
+
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg),
+					length, ctx->dma_addr, count, final);
+		}
+	}
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags |= SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0,
+								0, final);
+}
+
+static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (ctx->flags & SHA_FLAGS_SG) {
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+		if (ctx->sg->length == ctx->offset) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+		}
+		if (ctx->flags & SHA_FLAGS_PAD)
+			dma_unmap_single(dd->dev, ctx->dma_addr,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	} else {
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
+						SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0);
+
+	if (ctx->flags & SHA_FLAGS_CPU)
+		err = atmel_sha_update_cpu(dd);
+	else
+		err = atmel_sha_update_dma_start(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n",
+			err, ctx->digcnt);
+
+	return err;
+}
+
+static int atmel_sha_final_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+	int count;
+
+	if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_dma_map(dd, ctx, count, 1);
+	}
+	/* faster to handle last block with cpu */
+	else {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1);
+	}
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static void atmel_sha_copy_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)ctx->digest;
+	int i;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+	else
+		for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+}
+
+static void atmel_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+	else
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+}
+
+static int atmel_sha_finish(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+	int err = 0;
+
+	if (ctx->digcnt)
+		atmel_sha_copy_ready_hash(req);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt,
+		ctx->bufcnt);
+
+	return err;
+}
+
+static void atmel_sha_finish_req(struct ahash_request *req, int err)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	if (!err) {
+		atmel_sha_copy_hash(req);
+		if (SHA_FLAGS_FINAL & dd->flags)
+			err = atmel_sha_finish(req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+
+	/* atomic operation is not needed here */
+	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
+			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
+
+	clk_disable_unprepare(dd->iclk);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (SHA_FLAGS_INIT & dd->flags) {
+		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
+		atmel_sha_dualbuff_test(dd);
+		dd->flags |= SHA_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_sha_reqctx *ctx;
+	unsigned long flags;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	dd->req = req;
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	err = atmel_sha_hw_init(dd);
+
+	if (err)
+		goto err1;
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = atmel_sha_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) {
+			/* no final() after finup() */
+			err = atmel_sha_final_req(dd);
+		}
+	} else if (ctx->op == SHA_OP_FINAL) {
+		err = atmel_sha_final_req(dd);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return ret;
+}
+
+static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	ctx->op = op;
+
+	return atmel_sha_handle_queue(dd, req);
+}
+
+static int atmel_sha_update(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & SHA_FLAGS_FINUP) {
+		if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD)
+			/* faster to use CPU for short transfers */
+			ctx->flags |= SHA_FLAGS_CPU;
+	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
+		atmel_sha_append_sg(ctx);
+		return 0;
+	}
+	return atmel_sha_enqueue(req, SHA_OP_UPDATE);
+}
+
+static int atmel_sha_final(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	int err = 0;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	if (ctx->flags & SHA_FLAGS_ERROR)
+		return 0; /* uncompleted hash is not needed */
+
+	if (ctx->bufcnt) {
+		return atmel_sha_enqueue(req, SHA_OP_FINAL);
+	} else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */
+		err = atmel_sha_hw_init(dd);
+		if (err)
+			goto err1;
+
+		dd->flags |= SHA_FLAGS_BUSY;
+		err = atmel_sha_final_req(dd);
+	} else {
+		/* copy ready hash (+ finalize hmac) */
+		return atmel_sha_finish(req);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	return err;
+}
+
+static int atmel_sha_finup(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = atmel_sha_update(req);
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = atmel_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int atmel_sha_digest(struct ahash_request *req)
+{
+	return atmel_sha_init(req) ?: atmel_sha_finup(req);
+}
+
+static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n",
+				alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct atmel_sha_reqctx) +
+				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int atmel_sha_cra_init(struct crypto_tfm *tfm)
+{
+	return atmel_sha_cra_init_alg(tfm, NULL);
+}
+
+static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+}
+
+static struct ahash_alg sha_algs[] = {
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA1_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha1",
+			.cra_driver_name	= "atmel-sha1",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA1_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA256_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha256",
+			.cra_driver_name	= "atmel-sha256",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA256_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+};
+
+static void atmel_sha_done_task(unsigned long data)
+{
+	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
+	int err = 0;
+
+	if (!(SHA_FLAGS_BUSY & dd->flags)) {
+		atmel_sha_handle_queue(dd, NULL);
+		return;
+	}
+
+	if (SHA_FLAGS_CPU & dd->flags) {
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (SHA_FLAGS_DMA_READY & dd->flags) {
+		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
+			atmel_sha_update_dma_stop(dd);
+			if (dd->err) {
+				err = dd->err;
+				goto finish;
+			}
+		}
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			/* hash or semi-hash ready */
+			dd->flags &= ~(SHA_FLAGS_DMA_READY |
+						SHA_FLAGS_OUTPUT_READY);
+			err = atmel_sha_update_dma_start(dd);
+			if (err != -EINPROGRESS)
+				goto finish;
+		}
+	}
+	return;
+
+finish:
+	/* finish curent request */
+	atmel_sha_finish_req(dd->req, err);
+}
+
+static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
+{
+	struct atmel_sha_dev *sha_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_sha_read(sha_dd, SHA_ISR);
+	if (reg & atmel_sha_read(sha_dd, SHA_IMR)) {
+		atmel_sha_write(sha_dd, SHA_IDR, reg);
+		if (SHA_FLAGS_BUSY & sha_dd->flags) {
+			sha_dd->flags |= SHA_FLAGS_OUTPUT_READY;
+			if (!(SHA_FLAGS_CPU & sha_dd->flags))
+				sha_dd->flags |= SHA_FLAGS_DMA_READY;
+			tasklet_schedule(&sha_dd->done_task);
+		} else {
+			dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n");
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+}
+
+static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_sha_probe(struct platform_device *pdev)
+{
+	struct atmel_sha_dev *sha_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *sha_res;
+	unsigned long sha_phys_size;
+	int err;
+
+	sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL);
+	if (sha_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto sha_dd_err;
+	}
+
+	sha_dd->dev = dev;
+
+	platform_set_drvdata(pdev, sha_dd);
+
+	INIT_LIST_HEAD(&sha_dd->list);
+
+	tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
+					(unsigned long)sha_dd);
+
+	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
+
+	sha_dd->irq = -1;
+
+	/* Get the base address */
+	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!sha_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	sha_dd->phys_base = sha_res->start;
+	sha_phys_size = resource_size(sha_res);
+
+	/* Get the IRQ */
+	sha_dd->irq = platform_get_irq(pdev,  0);
+	if (sha_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = sha_dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
+						sha_dd);
+	if (err) {
+		dev_err(dev, "unable to request sha irq.\n");
+		goto res_err;
+	}
+
+	/* Initializing the clock */
+	sha_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sha_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(sha_dd->iclk);
+		goto clk_err;
+	}
+
+	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
+	if (!sha_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto sha_io_err;
+	}
+
+	spin_lock(&atmel_sha.lock);
+	list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
+	spin_unlock(&atmel_sha.lock);
+
+	err = atmel_sha_register_algs(sha_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel SHA1/SHA256\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+	iounmap(sha_dd->io_base);
+sha_io_err:
+	clk_put(sha_dd->iclk);
+clk_err:
+	free_irq(sha_dd->irq, sha_dd);
+res_err:
+	tasklet_kill(&sha_dd->done_task);
+	kfree(sha_dd);
+	sha_dd = NULL;
+sha_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_sha_remove(struct platform_device *pdev)
+{
+	static struct atmel_sha_dev *sha_dd;
+
+	sha_dd = platform_get_drvdata(pdev);
+	if (!sha_dd)
+		return -ENODEV;
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+
+	atmel_sha_unregister_algs(sha_dd);
+
+	tasklet_kill(&sha_dd->done_task);
+
+	iounmap(sha_dd->io_base);
+
+	clk_put(sha_dd->iclk);
+
+	if (sha_dd->irq >= 0)
+		free_irq(sha_dd->irq, sha_dd);
+
+	kfree(sha_dd);
+	sha_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_sha_driver = {
+	.probe		= atmel_sha_probe,
+	.remove		= __devexit_p(atmel_sha_remove),
+	.driver		= {
+		.name	= "atmel_sha",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_sha_driver);
+
+MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
-- 
1.7.7.6

WARNING: multiple messages have this Message-ID (diff)
From: eric@eukrea.com (Eric Bénard)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH 4/5] crypto: add Atmel SHA1/SHA256 driver
Date: Sun,  1 Jul 2012 19:19:46 +0200	[thread overview]
Message-ID: <1341163187-14946-5-git-send-email-eric@eukrea.com> (raw)
In-Reply-To: <1341163187-14946-1-git-send-email-eric@eukrea.com>

From: Nicolas Royer <nicolas@eukrea.com>

Signed-off-by: Nicolas Royer <nicolas@eukrea.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Eric B?nard <eric@eukrea.com>
Tested-by: Eric B?nard <eric@eukrea.com>
---
 drivers/crypto/Kconfig          |   14 +
 drivers/crypto/Makefile         |    1 +
 drivers/crypto/atmel-sha-regs.h |   46 ++
 drivers/crypto/atmel-sha.c      | 1112 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 1173 insertions(+), 0 deletions(-)
 create mode 100644 drivers/crypto/atmel-sha-regs.h
 create mode 100644 drivers/crypto/atmel-sha.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9ac7128..631014b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -358,4 +358,18 @@ config CRYPTO_DEV_ATMEL_TDES
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-tdes.
 
+config CRYPTO_DEV_ATMEL_SHA
+	tristate "Support for Atmel SHA1/SHA256 hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_ALGAPI
+	help
+	  Some Atmel processors have SHA1/SHA256 hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  SHA1/SHA256 algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-sha.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 211fdc2..387bee1 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
new file mode 100644
index 0000000..dc53a20
--- /dev/null
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -0,0 +1,46 @@
+#ifndef __ATMEL_SHA_REGS_H__
+#define __ATMEL_SHA_REGS_H__
+
+#define SHA_REG_DIGEST(x)		(0x80 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x40 + ((x) * 0x04))
+
+#define SHA_CR				0x00
+#define SHA_CR_START			(1 << 0)
+#define SHA_CR_FIRST			(1 << 4)
+#define SHA_CR_SWRST			(1 << 8)
+
+#define SHA_MR				0x04
+#define SHA_MR_MODE_MASK		(0x3 << 0)
+#define SHA_MR_MODE_MANUAL		0x0
+#define SHA_MR_MODE_AUTO		0x1
+#define SHA_MR_MODE_PDC			0x2
+#define	SHA_MR_DUALBUFF			(1 << 3)
+#define SHA_MR_PROCDLY			(1 << 4)
+#define SHA_MR_ALGO_SHA1		(0 << 8)
+#define SHA_MR_ALGO_SHA256		(1 << 8)
+
+#define SHA_IER				0x10
+#define SHA_IDR				0x14
+#define SHA_IMR				0x18
+#define SHA_ISR				0x1C
+#define SHA_INT_DATARDY			(1 << 0)
+#define SHA_INT_ENDTX			(1 << 1)
+#define SHA_INT_TXBUFE			(1 << 2)
+#define SHA_INT_URAD			(1 << 8)
+#define SHA_ISR_URAT_MASK		(0x7 << 12)
+#define SHA_ISR_URAT_IDR		(0x0 << 12)
+#define SHA_ISR_URAT_ODR		(0x1 << 12)
+#define SHA_ISR_URAT_MR			(0x2 << 12)
+#define SHA_ISR_URAT_WO			(0x5 << 12)
+
+#define SHA_TPR				0x108
+#define SHA_TCR				0x10C
+#define SHA_TNPR			0x118
+#define SHA_TNCR			0x11C
+#define SHA_PTCR			0x120
+#define SHA_PTCR_TXTEN		(1 << 8)
+#define SHA_PTCR_TXTDIS		(1 << 9)
+#define SHA_PTSR			0x124
+#define SHA_PTSR_TXTEN		(1 << 8)
+
+#endif /* __ATMEL_SHA_REGS_H__ */
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
new file mode 100644
index 0000000..f938b9d
--- /dev/null
+++ b/drivers/crypto/atmel-sha.c
@@ -0,0 +1,1112 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL SHA1/SHA256 HW acceleration.
+ *
+ * Copyright (c) 2012 Eukr?a Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-sham.c drivers.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include "atmel-sha-regs.h"
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY			BIT(0)
+#define	SHA_FLAGS_FINAL			BIT(1)
+#define SHA_FLAGS_DMA_ACTIVE	BIT(2)
+#define SHA_FLAGS_OUTPUT_READY	BIT(3)
+#define SHA_FLAGS_INIT			BIT(4)
+#define SHA_FLAGS_CPU			BIT(5)
+#define SHA_FLAGS_DMA_READY		BIT(6)
+
+#define SHA_FLAGS_FINUP		BIT(16)
+#define SHA_FLAGS_SG		BIT(17)
+#define SHA_FLAGS_SHA1		BIT(18)
+#define SHA_FLAGS_SHA256	BIT(19)
+#define SHA_FLAGS_ERROR		BIT(20)
+#define SHA_FLAGS_PAD		BIT(21)
+
+#define SHA_FLAGS_DUALBUFF	BIT(24)
+
+#define SHA_OP_UPDATE	1
+#define SHA_OP_FINAL	2
+
+#define SHA_BUFFER_LEN		PAGE_SIZE
+
+#define ATMEL_SHA_DMA_THRESHOLD		56
+
+
+struct atmel_sha_dev;
+
+struct atmel_sha_reqctx {
+	struct atmel_sha_dev	*dd;
+	unsigned long	flags;
+	unsigned long	op;
+
+	u8	digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	size_t	digcnt;
+	size_t	bufcnt;
+	size_t	buflen;
+	dma_addr_t	dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int	offset;	/* offset in current sg */
+	unsigned int	total;	/* total request */
+
+	u8	buffer[0] __aligned(sizeof(u32));
+};
+
+struct atmel_sha_ctx {
+	struct atmel_sha_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+};
+
+#define ATMEL_SHA_QUEUE_LENGTH	1
+
+struct atmel_sha_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+	void __iomem		*io_base;
+
+	spinlock_t		lock;
+	int			err;
+	struct tasklet_struct	done_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct atmel_sha_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_sha_drv atmel_sha = {
+	.dev_list = LIST_HEAD_INIT(atmel_sha.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
+};
+
+static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_sha_write(struct atmel_sha_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
+{
+	atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
+
+	if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
+		dd->flags |= SHA_FLAGS_DUALBUFF;
+}
+
+static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0)
+			break;
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The purpose of this padding is to ensure that the padded message
+ * is a multiple of 512 bits. The bit "1" is appended at the end of
+ * the message followed by "padlen-1" zero bits. Then a 64 bits block
+ * equals to the message length in bits is appended.
+ *
+ * padlen is calculated as followed:
+ *  - if message length < 56 bytes then padlen = 56 - message length
+ *  - else padlen = 64 + 56 - message length
+ */
+static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
+{
+	unsigned int index, padlen;
+	u64 bits;
+	u64 size;
+
+	bits = (ctx->bufcnt + ctx->digcnt + length) << 3;
+	size = cpu_to_be64(bits);
+
+	index = ctx->bufcnt & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	*(ctx->buffer + ctx->bufcnt) = 0x80;
+	memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
+	memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8);
+	ctx->bufcnt += padlen + 8;
+	ctx->flags |= SHA_FLAGS_PAD;
+}
+
+static int atmel_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = NULL;
+	struct atmel_sha_dev *tmp;
+
+	spin_lock_bh(&atmel_sha.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &atmel_sha.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+
+	spin_unlock_bh(&atmel_sha.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA1;
+	else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA256;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+	ctx->buflen = SHA_BUFFER_LEN;
+
+	return 0;
+}
+
+static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
+
+	if (likely(dma)) {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
+		valmr = SHA_MR_MODE_PDC;
+		if (dd->flags & SHA_FLAGS_DUALBUFF)
+			valmr = SHA_MR_DUALBUFF;
+	} else {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+	}
+
+	if (ctx->flags & SHA_FLAGS_SHA256)
+		valmr |= SHA_MR_ALGO_SHA256;
+
+	/* Setting CR_FIRST only for the first iteration */
+	if (!ctx->digcnt)
+		valcr = SHA_CR_FIRST;
+
+	atmel_sha_write(dd, SHA_CR, valcr);
+	atmel_sha_write(dd, SHA_MR, valmr);
+}
+
+static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	atmel_sha_write_ctrl(dd, 0);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	dd->flags |= SHA_FLAGS_CPU;
+
+	for (count = 0; count < len32; count++)
+		atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
+		size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int len32;
+
+	dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length1, final);
+
+	len32 = DIV_ROUND_UP(length1, sizeof(u32));
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
+	atmel_sha_write(dd, SHA_TPR, dma_addr1);
+	atmel_sha_write(dd, SHA_TCR, len32);
+
+	len32 = DIV_ROUND_UP(length2, sizeof(u32));
+	atmel_sha_write(dd, SHA_TNPR, dma_addr2);
+	atmel_sha_write(dd, SHA_TNCR, len32);
+
+	atmel_sha_write_ctrl(dd, 1);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length1;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |=  SHA_FLAGS_DMA_ACTIVE;
+
+	/* Start DMA transfer */
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	atmel_sha_append_sg(ctx);
+	atmel_sha_fill_padding(ctx, 0);
+
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
+					struct atmel_sha_reqctx *ctx,
+					size_t length, int final)
+{
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
+				SHA1_BLOCK_SIZE);
+		return -EINVAL;
+	}
+
+	ctx->flags &= ~SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final);
+}
+
+static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	atmel_sha_append_sg(ctx);
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final)
+		atmel_sha_fill_padding(ctx, 0);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return atmel_sha_xmit_dma_map(dd, ctx, count, final);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length, final, tail;
+	struct scatterlist *sg;
+	unsigned int count;
+
+	if (!ctx->total)
+		return 0;
+
+	if (ctx->bufcnt || ctx->offset)
+		return atmel_sha_update_dma_slow(dd);
+
+	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
+			ctx->digcnt, ctx->bufcnt, ctx->total);
+
+	sg = ctx->sg;
+
+	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+		return atmel_sha_update_dma_slow(dd);
+
+	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE))
+		/* size is not SHA1_BLOCK_SIZE aligned */
+		return atmel_sha_update_dma_slow(dd);
+
+	length = min(ctx->total, sg->length);
+
+	if (sg_is_last(sg)) {
+		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
+			/* not last sg must be SHA1_BLOCK_SIZE aligned */
+			tail = length & (SHA1_BLOCK_SIZE - 1);
+			length -= tail;
+			if (length == 0) {
+				/* offset where to start slow */
+				ctx->offset = length;
+				return atmel_sha_update_dma_slow(dd);
+			}
+		}
+	}
+
+	ctx->total -= length;
+	ctx->offset = length; /* offset where to start slow */
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	/* Add padding */
+	if (final) {
+		tail = length & (SHA1_BLOCK_SIZE - 1);
+		length -= tail;
+		ctx->total += tail;
+		ctx->offset = length; /* offset where to start slow */
+
+		sg = ctx->sg;
+		atmel_sha_append_sg(ctx);
+
+		atmel_sha_fill_padding(ctx, length);
+
+		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+			ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+			dev_err(dd->dev, "dma %u bytes error\n",
+				ctx->buflen + SHA1_BLOCK_SIZE);
+			return -EINVAL;
+		}
+
+		if (length == 0) {
+			ctx->flags &= ~SHA_FLAGS_SG;
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0,
+					0, final);
+		} else {
+			ctx->sg = sg;
+			if (!dma_map_sg(dd->dev, ctx->sg, 1,
+				DMA_TO_DEVICE)) {
+					dev_err(dd->dev, "dma_map_sg  error\n");
+					return -EINVAL;
+			}
+
+			ctx->flags |= SHA_FLAGS_SG;
+
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg),
+					length, ctx->dma_addr, count, final);
+		}
+	}
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags |= SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0,
+								0, final);
+}
+
+static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (ctx->flags & SHA_FLAGS_SG) {
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+		if (ctx->sg->length == ctx->offset) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+		}
+		if (ctx->flags & SHA_FLAGS_PAD)
+			dma_unmap_single(dd->dev, ctx->dma_addr,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	} else {
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
+						SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0);
+
+	if (ctx->flags & SHA_FLAGS_CPU)
+		err = atmel_sha_update_cpu(dd);
+	else
+		err = atmel_sha_update_dma_start(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n",
+			err, ctx->digcnt);
+
+	return err;
+}
+
+static int atmel_sha_final_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+	int count;
+
+	if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_dma_map(dd, ctx, count, 1);
+	}
+	/* faster to handle last block with cpu */
+	else {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1);
+	}
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static void atmel_sha_copy_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)ctx->digest;
+	int i;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+	else
+		for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+}
+
+static void atmel_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+	else
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+}
+
+static int atmel_sha_finish(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+	int err = 0;
+
+	if (ctx->digcnt)
+		atmel_sha_copy_ready_hash(req);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt,
+		ctx->bufcnt);
+
+	return err;
+}
+
+static void atmel_sha_finish_req(struct ahash_request *req, int err)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	if (!err) {
+		atmel_sha_copy_hash(req);
+		if (SHA_FLAGS_FINAL & dd->flags)
+			err = atmel_sha_finish(req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+
+	/* atomic operation is not needed here */
+	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
+			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
+
+	clk_disable_unprepare(dd->iclk);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (SHA_FLAGS_INIT & dd->flags) {
+		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
+		atmel_sha_dualbuff_test(dd);
+		dd->flags |= SHA_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_sha_reqctx *ctx;
+	unsigned long flags;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	dd->req = req;
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	err = atmel_sha_hw_init(dd);
+
+	if (err)
+		goto err1;
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = atmel_sha_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) {
+			/* no final() after finup() */
+			err = atmel_sha_final_req(dd);
+		}
+	} else if (ctx->op == SHA_OP_FINAL) {
+		err = atmel_sha_final_req(dd);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return ret;
+}
+
+static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	ctx->op = op;
+
+	return atmel_sha_handle_queue(dd, req);
+}
+
+static int atmel_sha_update(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & SHA_FLAGS_FINUP) {
+		if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD)
+			/* faster to use CPU for short transfers */
+			ctx->flags |= SHA_FLAGS_CPU;
+	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
+		atmel_sha_append_sg(ctx);
+		return 0;
+	}
+	return atmel_sha_enqueue(req, SHA_OP_UPDATE);
+}
+
+static int atmel_sha_final(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	int err = 0;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	if (ctx->flags & SHA_FLAGS_ERROR)
+		return 0; /* uncompleted hash is not needed */
+
+	if (ctx->bufcnt) {
+		return atmel_sha_enqueue(req, SHA_OP_FINAL);
+	} else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */
+		err = atmel_sha_hw_init(dd);
+		if (err)
+			goto err1;
+
+		dd->flags |= SHA_FLAGS_BUSY;
+		err = atmel_sha_final_req(dd);
+	} else {
+		/* copy ready hash (+ finalize hmac) */
+		return atmel_sha_finish(req);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	return err;
+}
+
+static int atmel_sha_finup(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = atmel_sha_update(req);
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = atmel_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int atmel_sha_digest(struct ahash_request *req)
+{
+	return atmel_sha_init(req) ?: atmel_sha_finup(req);
+}
+
+static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n",
+				alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct atmel_sha_reqctx) +
+				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int atmel_sha_cra_init(struct crypto_tfm *tfm)
+{
+	return atmel_sha_cra_init_alg(tfm, NULL);
+}
+
+static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+}
+
+static struct ahash_alg sha_algs[] = {
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA1_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha1",
+			.cra_driver_name	= "atmel-sha1",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA1_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA256_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha256",
+			.cra_driver_name	= "atmel-sha256",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA256_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+};
+
+static void atmel_sha_done_task(unsigned long data)
+{
+	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
+	int err = 0;
+
+	if (!(SHA_FLAGS_BUSY & dd->flags)) {
+		atmel_sha_handle_queue(dd, NULL);
+		return;
+	}
+
+	if (SHA_FLAGS_CPU & dd->flags) {
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (SHA_FLAGS_DMA_READY & dd->flags) {
+		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
+			atmel_sha_update_dma_stop(dd);
+			if (dd->err) {
+				err = dd->err;
+				goto finish;
+			}
+		}
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			/* hash or semi-hash ready */
+			dd->flags &= ~(SHA_FLAGS_DMA_READY |
+						SHA_FLAGS_OUTPUT_READY);
+			err = atmel_sha_update_dma_start(dd);
+			if (err != -EINPROGRESS)
+				goto finish;
+		}
+	}
+	return;
+
+finish:
+	/* finish curent request */
+	atmel_sha_finish_req(dd->req, err);
+}
+
+static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
+{
+	struct atmel_sha_dev *sha_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_sha_read(sha_dd, SHA_ISR);
+	if (reg & atmel_sha_read(sha_dd, SHA_IMR)) {
+		atmel_sha_write(sha_dd, SHA_IDR, reg);
+		if (SHA_FLAGS_BUSY & sha_dd->flags) {
+			sha_dd->flags |= SHA_FLAGS_OUTPUT_READY;
+			if (!(SHA_FLAGS_CPU & sha_dd->flags))
+				sha_dd->flags |= SHA_FLAGS_DMA_READY;
+			tasklet_schedule(&sha_dd->done_task);
+		} else {
+			dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n");
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+}
+
+static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_sha_probe(struct platform_device *pdev)
+{
+	struct atmel_sha_dev *sha_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *sha_res;
+	unsigned long sha_phys_size;
+	int err;
+
+	sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL);
+	if (sha_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto sha_dd_err;
+	}
+
+	sha_dd->dev = dev;
+
+	platform_set_drvdata(pdev, sha_dd);
+
+	INIT_LIST_HEAD(&sha_dd->list);
+
+	tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
+					(unsigned long)sha_dd);
+
+	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
+
+	sha_dd->irq = -1;
+
+	/* Get the base address */
+	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!sha_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	sha_dd->phys_base = sha_res->start;
+	sha_phys_size = resource_size(sha_res);
+
+	/* Get the IRQ */
+	sha_dd->irq = platform_get_irq(pdev,  0);
+	if (sha_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = sha_dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
+						sha_dd);
+	if (err) {
+		dev_err(dev, "unable to request sha irq.\n");
+		goto res_err;
+	}
+
+	/* Initializing the clock */
+	sha_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sha_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(sha_dd->iclk);
+		goto clk_err;
+	}
+
+	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
+	if (!sha_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto sha_io_err;
+	}
+
+	spin_lock(&atmel_sha.lock);
+	list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
+	spin_unlock(&atmel_sha.lock);
+
+	err = atmel_sha_register_algs(sha_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel SHA1/SHA256\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+	iounmap(sha_dd->io_base);
+sha_io_err:
+	clk_put(sha_dd->iclk);
+clk_err:
+	free_irq(sha_dd->irq, sha_dd);
+res_err:
+	tasklet_kill(&sha_dd->done_task);
+	kfree(sha_dd);
+	sha_dd = NULL;
+sha_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_sha_remove(struct platform_device *pdev)
+{
+	static struct atmel_sha_dev *sha_dd;
+
+	sha_dd = platform_get_drvdata(pdev);
+	if (!sha_dd)
+		return -ENODEV;
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+
+	atmel_sha_unregister_algs(sha_dd);
+
+	tasklet_kill(&sha_dd->done_task);
+
+	iounmap(sha_dd->io_base);
+
+	clk_put(sha_dd->iclk);
+
+	if (sha_dd->irq >= 0)
+		free_irq(sha_dd->irq, sha_dd);
+
+	kfree(sha_dd);
+	sha_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_sha_driver = {
+	.probe		= atmel_sha_probe,
+	.remove		= __devexit_p(atmel_sha_remove),
+	.driver		= {
+		.name	= "atmel_sha",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_sha_driver);
+
+MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukr?a Electromatique");
-- 
1.7.7.6

WARNING: multiple messages have this Message-ID (diff)
From: "Eric Bénard" <eric@eukrea.com>
To: linux-kernel@vger.kernel.org
Cc: nicolas.ferre@atmel.com, linux-arm-kernel@lists.infradead.org,
	linux-crypto@vger.kernel.org, herbert@gondor.hengli.com.au,
	davem@davemloft.net, plagnioj@jcrosoft.com, nicolas@eukrea.com,
	eric@eukrea.com
Subject: [PATCH 4/5] crypto: add Atmel SHA1/SHA256 driver
Date: Sun,  1 Jul 2012 19:19:46 +0200	[thread overview]
Message-ID: <1341163187-14946-5-git-send-email-eric@eukrea.com> (raw)
In-Reply-To: <1341163187-14946-1-git-send-email-eric@eukrea.com>

From: Nicolas Royer <nicolas@eukrea.com>

Signed-off-by: Nicolas Royer <nicolas@eukrea.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Acked-by: Eric Bénard <eric@eukrea.com>
Tested-by: Eric Bénard <eric@eukrea.com>
---
 drivers/crypto/Kconfig          |   14 +
 drivers/crypto/Makefile         |    1 +
 drivers/crypto/atmel-sha-regs.h |   46 ++
 drivers/crypto/atmel-sha.c      | 1112 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 1173 insertions(+), 0 deletions(-)
 create mode 100644 drivers/crypto/atmel-sha-regs.h
 create mode 100644 drivers/crypto/atmel-sha.c

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9ac7128..631014b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -358,4 +358,18 @@ config CRYPTO_DEV_ATMEL_TDES
 	  To compile this driver as a module, choose M here: the module
 	  will be called atmel-tdes.
 
+config CRYPTO_DEV_ATMEL_SHA
+	tristate "Support for Atmel SHA1/SHA256 hw accelerator"
+	depends on ARCH_AT91
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_ALGAPI
+	help
+	  Some Atmel processors have SHA1/SHA256 hw accelerator.
+	  Select this if you want to use the Atmel module for
+	  SHA1/SHA256 algorithms.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called atmel-sha.
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 211fdc2..387bee1 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_AES) += atmel-aes.o
 obj-$(CONFIG_CRYPTO_DEV_ATMEL_TDES) += atmel-tdes.o
+obj-$(CONFIG_CRYPTO_DEV_ATMEL_SHA) += atmel-sha.o
diff --git a/drivers/crypto/atmel-sha-regs.h b/drivers/crypto/atmel-sha-regs.h
new file mode 100644
index 0000000..dc53a20
--- /dev/null
+++ b/drivers/crypto/atmel-sha-regs.h
@@ -0,0 +1,46 @@
+#ifndef __ATMEL_SHA_REGS_H__
+#define __ATMEL_SHA_REGS_H__
+
+#define SHA_REG_DIGEST(x)		(0x80 + ((x) * 0x04))
+#define SHA_REG_DIN(x)			(0x40 + ((x) * 0x04))
+
+#define SHA_CR				0x00
+#define SHA_CR_START			(1 << 0)
+#define SHA_CR_FIRST			(1 << 4)
+#define SHA_CR_SWRST			(1 << 8)
+
+#define SHA_MR				0x04
+#define SHA_MR_MODE_MASK		(0x3 << 0)
+#define SHA_MR_MODE_MANUAL		0x0
+#define SHA_MR_MODE_AUTO		0x1
+#define SHA_MR_MODE_PDC			0x2
+#define	SHA_MR_DUALBUFF			(1 << 3)
+#define SHA_MR_PROCDLY			(1 << 4)
+#define SHA_MR_ALGO_SHA1		(0 << 8)
+#define SHA_MR_ALGO_SHA256		(1 << 8)
+
+#define SHA_IER				0x10
+#define SHA_IDR				0x14
+#define SHA_IMR				0x18
+#define SHA_ISR				0x1C
+#define SHA_INT_DATARDY			(1 << 0)
+#define SHA_INT_ENDTX			(1 << 1)
+#define SHA_INT_TXBUFE			(1 << 2)
+#define SHA_INT_URAD			(1 << 8)
+#define SHA_ISR_URAT_MASK		(0x7 << 12)
+#define SHA_ISR_URAT_IDR		(0x0 << 12)
+#define SHA_ISR_URAT_ODR		(0x1 << 12)
+#define SHA_ISR_URAT_MR			(0x2 << 12)
+#define SHA_ISR_URAT_WO			(0x5 << 12)
+
+#define SHA_TPR				0x108
+#define SHA_TCR				0x10C
+#define SHA_TNPR			0x118
+#define SHA_TNCR			0x11C
+#define SHA_PTCR			0x120
+#define SHA_PTCR_TXTEN		(1 << 8)
+#define SHA_PTCR_TXTDIS		(1 << 9)
+#define SHA_PTSR			0x124
+#define SHA_PTSR_TXTEN		(1 << 8)
+
+#endif /* __ATMEL_SHA_REGS_H__ */
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
new file mode 100644
index 0000000..f938b9d
--- /dev/null
+++ b/drivers/crypto/atmel-sha.c
@@ -0,0 +1,1112 @@
+/*
+ * Cryptographic API.
+ *
+ * Support for ATMEL SHA1/SHA256 HW acceleration.
+ *
+ * Copyright (c) 2012 Eukréa Electromatique - ATMEL
+ * Author: Nicolas Royer <nicolas@eukrea.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Some ideas are from omap-sham.c drivers.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/hw_random.h>
+#include <linux/platform_device.h>
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/algapi.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include "atmel-sha-regs.h"
+
+/* SHA flags */
+#define SHA_FLAGS_BUSY			BIT(0)
+#define	SHA_FLAGS_FINAL			BIT(1)
+#define SHA_FLAGS_DMA_ACTIVE	BIT(2)
+#define SHA_FLAGS_OUTPUT_READY	BIT(3)
+#define SHA_FLAGS_INIT			BIT(4)
+#define SHA_FLAGS_CPU			BIT(5)
+#define SHA_FLAGS_DMA_READY		BIT(6)
+
+#define SHA_FLAGS_FINUP		BIT(16)
+#define SHA_FLAGS_SG		BIT(17)
+#define SHA_FLAGS_SHA1		BIT(18)
+#define SHA_FLAGS_SHA256	BIT(19)
+#define SHA_FLAGS_ERROR		BIT(20)
+#define SHA_FLAGS_PAD		BIT(21)
+
+#define SHA_FLAGS_DUALBUFF	BIT(24)
+
+#define SHA_OP_UPDATE	1
+#define SHA_OP_FINAL	2
+
+#define SHA_BUFFER_LEN		PAGE_SIZE
+
+#define ATMEL_SHA_DMA_THRESHOLD		56
+
+
+struct atmel_sha_dev;
+
+struct atmel_sha_reqctx {
+	struct atmel_sha_dev	*dd;
+	unsigned long	flags;
+	unsigned long	op;
+
+	u8	digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
+	size_t	digcnt;
+	size_t	bufcnt;
+	size_t	buflen;
+	dma_addr_t	dma_addr;
+
+	/* walk state */
+	struct scatterlist	*sg;
+	unsigned int	offset;	/* offset in current sg */
+	unsigned int	total;	/* total request */
+
+	u8	buffer[0] __aligned(sizeof(u32));
+};
+
+struct atmel_sha_ctx {
+	struct atmel_sha_dev	*dd;
+
+	unsigned long		flags;
+
+	/* fallback stuff */
+	struct crypto_shash	*fallback;
+
+};
+
+#define ATMEL_SHA_QUEUE_LENGTH	1
+
+struct atmel_sha_dev {
+	struct list_head	list;
+	unsigned long		phys_base;
+	struct device		*dev;
+	struct clk			*iclk;
+	int					irq;
+	void __iomem		*io_base;
+
+	spinlock_t		lock;
+	int			err;
+	struct tasklet_struct	done_task;
+
+	unsigned long		flags;
+	struct crypto_queue	queue;
+	struct ahash_request	*req;
+};
+
+struct atmel_sha_drv {
+	struct list_head	dev_list;
+	spinlock_t		lock;
+};
+
+static struct atmel_sha_drv atmel_sha = {
+	.dev_list = LIST_HEAD_INIT(atmel_sha.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
+};
+
+static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
+{
+	return readl_relaxed(dd->io_base + offset);
+}
+
+static inline void atmel_sha_write(struct atmel_sha_dev *dd,
+					u32 offset, u32 value)
+{
+	writel_relaxed(value, dd->io_base + offset);
+}
+
+static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
+{
+	atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
+
+	if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
+		dd->flags |= SHA_FLAGS_DUALBUFF;
+}
+
+static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
+{
+	size_t count;
+
+	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
+		count = min(ctx->sg->length - ctx->offset, ctx->total);
+		count = min(count, ctx->buflen - ctx->bufcnt);
+
+		if (count <= 0)
+			break;
+
+		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
+			ctx->offset, count, 0);
+
+		ctx->bufcnt += count;
+		ctx->offset += count;
+		ctx->total -= count;
+
+		if (ctx->offset == ctx->sg->length) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+			else
+				ctx->total = 0;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * The purpose of this padding is to ensure that the padded message
+ * is a multiple of 512 bits. The bit "1" is appended at the end of
+ * the message followed by "padlen-1" zero bits. Then a 64 bits block
+ * equals to the message length in bits is appended.
+ *
+ * padlen is calculated as followed:
+ *  - if message length < 56 bytes then padlen = 56 - message length
+ *  - else padlen = 64 + 56 - message length
+ */
+static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
+{
+	unsigned int index, padlen;
+	u64 bits;
+	u64 size;
+
+	bits = (ctx->bufcnt + ctx->digcnt + length) << 3;
+	size = cpu_to_be64(bits);
+
+	index = ctx->bufcnt & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	*(ctx->buffer + ctx->bufcnt) = 0x80;
+	memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
+	memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8);
+	ctx->bufcnt += padlen + 8;
+	ctx->flags |= SHA_FLAGS_PAD;
+}
+
+static int atmel_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = NULL;
+	struct atmel_sha_dev *tmp;
+
+	spin_lock_bh(&atmel_sha.lock);
+	if (!tctx->dd) {
+		list_for_each_entry(tmp, &atmel_sha.dev_list, list) {
+			dd = tmp;
+			break;
+		}
+		tctx->dd = dd;
+	} else {
+		dd = tctx->dd;
+	}
+
+	spin_unlock_bh(&atmel_sha.lock);
+
+	ctx->dd = dd;
+
+	ctx->flags = 0;
+
+	dev_dbg(dd->dev, "init: digest size: %d\n",
+		crypto_ahash_digestsize(tfm));
+
+	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA1;
+	else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE)
+		ctx->flags |= SHA_FLAGS_SHA256;
+
+	ctx->bufcnt = 0;
+	ctx->digcnt = 0;
+	ctx->buflen = SHA_BUFFER_LEN;
+
+	return 0;
+}
+
+static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
+
+	if (likely(dma)) {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
+		valmr = SHA_MR_MODE_PDC;
+		if (dd->flags & SHA_FLAGS_DUALBUFF)
+			valmr = SHA_MR_DUALBUFF;
+	} else {
+		atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
+	}
+
+	if (ctx->flags & SHA_FLAGS_SHA256)
+		valmr |= SHA_MR_ALGO_SHA256;
+
+	/* Setting CR_FIRST only for the first iteration */
+	if (!ctx->digcnt)
+		valcr = SHA_CR_FIRST;
+
+	atmel_sha_write(dd, SHA_CR, valcr);
+	atmel_sha_write(dd, SHA_MR, valmr);
+}
+
+static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
+			      size_t length, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int count, len32;
+	const u32 *buffer = (const u32 *)buf;
+
+	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length, final);
+
+	atmel_sha_write_ctrl(dd, 0);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	len32 = DIV_ROUND_UP(length, sizeof(u32));
+
+	dd->flags |= SHA_FLAGS_CPU;
+
+	for (count = 0; count < len32; count++)
+		atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
+		size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int len32;
+
+	dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n",
+						ctx->digcnt, length1, final);
+
+	len32 = DIV_ROUND_UP(length1, sizeof(u32));
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
+	atmel_sha_write(dd, SHA_TPR, dma_addr1);
+	atmel_sha_write(dd, SHA_TCR, len32);
+
+	len32 = DIV_ROUND_UP(length2, sizeof(u32));
+	atmel_sha_write(dd, SHA_TNPR, dma_addr2);
+	atmel_sha_write(dd, SHA_TNCR, len32);
+
+	atmel_sha_write_ctrl(dd, 1);
+
+	/* should be non-zero before next lines to disable clocks later */
+	ctx->digcnt += length1;
+
+	if (final)
+		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
+
+	dd->flags |=  SHA_FLAGS_DMA_ACTIVE;
+
+	/* Start DMA transfer */
+	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN);
+
+	return -EINPROGRESS;
+}
+
+static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	int bufcnt;
+
+	atmel_sha_append_sg(ctx);
+	atmel_sha_fill_padding(ctx, 0);
+
+	bufcnt = ctx->bufcnt;
+	ctx->bufcnt = 0;
+
+	return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
+}
+
+static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
+					struct atmel_sha_reqctx *ctx,
+					size_t length, int final)
+{
+	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
+				SHA1_BLOCK_SIZE);
+		return -EINVAL;
+	}
+
+	ctx->flags &= ~SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final);
+}
+
+static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int final;
+	size_t count;
+
+	atmel_sha_append_sg(ctx);
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
+					 ctx->bufcnt, ctx->digcnt, final);
+
+	if (final)
+		atmel_sha_fill_padding(ctx, 0);
+
+	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		return atmel_sha_xmit_dma_map(dd, ctx, count, final);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+	unsigned int length, final, tail;
+	struct scatterlist *sg;
+	unsigned int count;
+
+	if (!ctx->total)
+		return 0;
+
+	if (ctx->bufcnt || ctx->offset)
+		return atmel_sha_update_dma_slow(dd);
+
+	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
+			ctx->digcnt, ctx->bufcnt, ctx->total);
+
+	sg = ctx->sg;
+
+	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+		return atmel_sha_update_dma_slow(dd);
+
+	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE))
+		/* size is not SHA1_BLOCK_SIZE aligned */
+		return atmel_sha_update_dma_slow(dd);
+
+	length = min(ctx->total, sg->length);
+
+	if (sg_is_last(sg)) {
+		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
+			/* not last sg must be SHA1_BLOCK_SIZE aligned */
+			tail = length & (SHA1_BLOCK_SIZE - 1);
+			length -= tail;
+			if (length == 0) {
+				/* offset where to start slow */
+				ctx->offset = length;
+				return atmel_sha_update_dma_slow(dd);
+			}
+		}
+	}
+
+	ctx->total -= length;
+	ctx->offset = length; /* offset where to start slow */
+
+	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
+
+	/* Add padding */
+	if (final) {
+		tail = length & (SHA1_BLOCK_SIZE - 1);
+		length -= tail;
+		ctx->total += tail;
+		ctx->offset = length; /* offset where to start slow */
+
+		sg = ctx->sg;
+		atmel_sha_append_sg(ctx);
+
+		atmel_sha_fill_padding(ctx, length);
+
+		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
+			ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
+			dev_err(dd->dev, "dma %u bytes error\n",
+				ctx->buflen + SHA1_BLOCK_SIZE);
+			return -EINVAL;
+		}
+
+		if (length == 0) {
+			ctx->flags &= ~SHA_FLAGS_SG;
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0,
+					0, final);
+		} else {
+			ctx->sg = sg;
+			if (!dma_map_sg(dd->dev, ctx->sg, 1,
+				DMA_TO_DEVICE)) {
+					dev_err(dd->dev, "dma_map_sg  error\n");
+					return -EINVAL;
+			}
+
+			ctx->flags |= SHA_FLAGS_SG;
+
+			count = ctx->bufcnt;
+			ctx->bufcnt = 0;
+			return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg),
+					length, ctx->dma_addr, count, final);
+		}
+	}
+
+	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
+		dev_err(dd->dev, "dma_map_sg  error\n");
+		return -EINVAL;
+	}
+
+	ctx->flags |= SHA_FLAGS_SG;
+
+	/* next call does not fail... so no unmap in the case of error */
+	return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0,
+								0, final);
+}
+
+static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
+
+	if (ctx->flags & SHA_FLAGS_SG) {
+		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+		if (ctx->sg->length == ctx->offset) {
+			ctx->sg = sg_next(ctx->sg);
+			if (ctx->sg)
+				ctx->offset = 0;
+		}
+		if (ctx->flags & SHA_FLAGS_PAD)
+			dma_unmap_single(dd->dev, ctx->dma_addr,
+				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	} else {
+		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
+						SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
+	}
+
+	return 0;
+}
+
+static int atmel_sha_update_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err;
+
+	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
+		 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0);
+
+	if (ctx->flags & SHA_FLAGS_CPU)
+		err = atmel_sha_update_cpu(dd);
+	else
+		err = atmel_sha_update_dma_start(dd);
+
+	/* wait for dma completion before can take more data */
+	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n",
+			err, ctx->digcnt);
+
+	return err;
+}
+
+static int atmel_sha_final_req(struct atmel_sha_dev *dd)
+{
+	struct ahash_request *req = dd->req;
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err = 0;
+	int count;
+
+	if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_dma_map(dd, ctx, count, 1);
+	}
+	/* faster to handle last block with cpu */
+	else {
+		atmel_sha_fill_padding(ctx, 0);
+		count = ctx->bufcnt;
+		ctx->bufcnt = 0;
+		err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1);
+	}
+
+	dev_dbg(dd->dev, "final_req: err: %d\n", err);
+
+	return err;
+}
+
+static void atmel_sha_copy_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	u32 *hash = (u32 *)ctx->digest;
+	int i;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+	else
+		for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
+			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
+}
+
+static void atmel_sha_copy_ready_hash(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->result)
+		return;
+
+	if (likely(ctx->flags & SHA_FLAGS_SHA1))
+		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
+	else
+		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
+}
+
+static int atmel_sha_finish(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+	int err = 0;
+
+	if (ctx->digcnt)
+		atmel_sha_copy_ready_hash(req);
+
+	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt,
+		ctx->bufcnt);
+
+	return err;
+}
+
+static void atmel_sha_finish_req(struct ahash_request *req, int err)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_dev *dd = ctx->dd;
+
+	if (!err) {
+		atmel_sha_copy_hash(req);
+		if (SHA_FLAGS_FINAL & dd->flags)
+			err = atmel_sha_finish(req);
+	} else {
+		ctx->flags |= SHA_FLAGS_ERROR;
+	}
+
+	/* atomic operation is not needed here */
+	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
+			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
+
+	clk_disable_unprepare(dd->iclk);
+
+	if (req->base.complete)
+		req->base.complete(&req->base, err);
+
+	/* handle new request */
+	tasklet_schedule(&dd->done_task);
+}
+
+static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
+{
+	clk_prepare_enable(dd->iclk);
+
+	if (SHA_FLAGS_INIT & dd->flags) {
+		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
+		atmel_sha_dualbuff_test(dd);
+		dd->flags |= SHA_FLAGS_INIT;
+		dd->err = 0;
+	}
+
+	return 0;
+}
+
+static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
+				  struct ahash_request *req)
+{
+	struct crypto_async_request *async_req, *backlog;
+	struct atmel_sha_reqctx *ctx;
+	unsigned long flags;
+	int err = 0, ret = 0;
+
+	spin_lock_irqsave(&dd->lock, flags);
+	if (req)
+		ret = ahash_enqueue_request(&dd->queue, req);
+
+	if (SHA_FLAGS_BUSY & dd->flags) {
+		spin_unlock_irqrestore(&dd->lock, flags);
+		return ret;
+	}
+
+	backlog = crypto_get_backlog(&dd->queue);
+	async_req = crypto_dequeue_request(&dd->queue);
+	if (async_req)
+		dd->flags |= SHA_FLAGS_BUSY;
+
+	spin_unlock_irqrestore(&dd->lock, flags);
+
+	if (!async_req)
+		return ret;
+
+	if (backlog)
+		backlog->complete(backlog, -EINPROGRESS);
+
+	req = ahash_request_cast(async_req);
+	dd->req = req;
+	ctx = ahash_request_ctx(req);
+
+	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
+						ctx->op, req->nbytes);
+
+	err = atmel_sha_hw_init(dd);
+
+	if (err)
+		goto err1;
+
+	if (ctx->op == SHA_OP_UPDATE) {
+		err = atmel_sha_update_req(dd);
+		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) {
+			/* no final() after finup() */
+			err = atmel_sha_final_req(dd);
+		}
+	} else if (ctx->op == SHA_OP_FINAL) {
+		err = atmel_sha_final_req(dd);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	dev_dbg(dd->dev, "exit, err: %d\n", err);
+
+	return ret;
+}
+
+static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	ctx->op = op;
+
+	return atmel_sha_handle_queue(dd, req);
+}
+
+static int atmel_sha_update(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+
+	if (!req->nbytes)
+		return 0;
+
+	ctx->total = req->nbytes;
+	ctx->sg = req->src;
+	ctx->offset = 0;
+
+	if (ctx->flags & SHA_FLAGS_FINUP) {
+		if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD)
+			/* faster to use CPU for short transfers */
+			ctx->flags |= SHA_FLAGS_CPU;
+	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
+		atmel_sha_append_sg(ctx);
+		return 0;
+	}
+	return atmel_sha_enqueue(req, SHA_OP_UPDATE);
+}
+
+static int atmel_sha_final(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
+	struct atmel_sha_dev *dd = tctx->dd;
+
+	int err = 0;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	if (ctx->flags & SHA_FLAGS_ERROR)
+		return 0; /* uncompleted hash is not needed */
+
+	if (ctx->bufcnt) {
+		return atmel_sha_enqueue(req, SHA_OP_FINAL);
+	} else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */
+		err = atmel_sha_hw_init(dd);
+		if (err)
+			goto err1;
+
+		dd->flags |= SHA_FLAGS_BUSY;
+		err = atmel_sha_final_req(dd);
+	} else {
+		/* copy ready hash (+ finalize hmac) */
+		return atmel_sha_finish(req);
+	}
+
+err1:
+	if (err != -EINPROGRESS)
+		/* done_task will not finish it, so do it here */
+		atmel_sha_finish_req(req, err);
+
+	return err;
+}
+
+static int atmel_sha_finup(struct ahash_request *req)
+{
+	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
+	int err1, err2;
+
+	ctx->flags |= SHA_FLAGS_FINUP;
+
+	err1 = atmel_sha_update(req);
+	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+		return err1;
+
+	/*
+	 * final() has to be always called to cleanup resources
+	 * even if udpate() failed, except EINPROGRESS
+	 */
+	err2 = atmel_sha_final(req);
+
+	return err1 ?: err2;
+}
+
+static int atmel_sha_digest(struct ahash_request *req)
+{
+	return atmel_sha_init(req) ?: atmel_sha_finup(req);
+}
+
+static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+	const char *alg_name = crypto_tfm_alg_name(tfm);
+
+	/* Allocate a fallback and abort if it failed. */
+	tctx->fallback = crypto_alloc_shash(alg_name, 0,
+					    CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(tctx->fallback)) {
+		pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n",
+				alg_name);
+		return PTR_ERR(tctx->fallback);
+	}
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct atmel_sha_reqctx) +
+				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int atmel_sha_cra_init(struct crypto_tfm *tfm)
+{
+	return atmel_sha_cra_init_alg(tfm, NULL);
+}
+
+static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_shash(tctx->fallback);
+	tctx->fallback = NULL;
+}
+
+static struct ahash_alg sha_algs[] = {
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA1_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha1",
+			.cra_driver_name	= "atmel-sha1",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA1_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+{
+	.init		= atmel_sha_init,
+	.update		= atmel_sha_update,
+	.final		= atmel_sha_final,
+	.finup		= atmel_sha_finup,
+	.digest		= atmel_sha_digest,
+	.halg = {
+		.digestsize	= SHA256_DIGEST_SIZE,
+		.base	= {
+			.cra_name		= "sha256",
+			.cra_driver_name	= "atmel-sha256",
+			.cra_priority		= 100,
+			.cra_flags		= CRYPTO_ALG_ASYNC |
+						CRYPTO_ALG_NEED_FALLBACK,
+			.cra_blocksize		= SHA256_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
+			.cra_alignmask		= 0,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= atmel_sha_cra_init,
+			.cra_exit		= atmel_sha_cra_exit,
+		}
+	}
+},
+};
+
+static void atmel_sha_done_task(unsigned long data)
+{
+	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
+	int err = 0;
+
+	if (!(SHA_FLAGS_BUSY & dd->flags)) {
+		atmel_sha_handle_queue(dd, NULL);
+		return;
+	}
+
+	if (SHA_FLAGS_CPU & dd->flags) {
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
+			goto finish;
+		}
+	} else if (SHA_FLAGS_DMA_READY & dd->flags) {
+		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
+			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
+			atmel_sha_update_dma_stop(dd);
+			if (dd->err) {
+				err = dd->err;
+				goto finish;
+			}
+		}
+		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
+			/* hash or semi-hash ready */
+			dd->flags &= ~(SHA_FLAGS_DMA_READY |
+						SHA_FLAGS_OUTPUT_READY);
+			err = atmel_sha_update_dma_start(dd);
+			if (err != -EINPROGRESS)
+				goto finish;
+		}
+	}
+	return;
+
+finish:
+	/* finish curent request */
+	atmel_sha_finish_req(dd->req, err);
+}
+
+static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
+{
+	struct atmel_sha_dev *sha_dd = dev_id;
+	u32 reg;
+
+	reg = atmel_sha_read(sha_dd, SHA_ISR);
+	if (reg & atmel_sha_read(sha_dd, SHA_IMR)) {
+		atmel_sha_write(sha_dd, SHA_IDR, reg);
+		if (SHA_FLAGS_BUSY & sha_dd->flags) {
+			sha_dd->flags |= SHA_FLAGS_OUTPUT_READY;
+			if (!(SHA_FLAGS_CPU & sha_dd->flags))
+				sha_dd->flags |= SHA_FLAGS_DMA_READY;
+			tasklet_schedule(&sha_dd->done_task);
+		} else {
+			dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n");
+		}
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
+		crypto_unregister_ahash(&sha_algs[i]);
+}
+
+static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
+{
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		err = crypto_register_ahash(&sha_algs[i]);
+		if (err)
+			goto err_sha_algs;
+	}
+
+	return 0;
+
+err_sha_algs:
+	for (j = 0; j < i; j++)
+		crypto_unregister_ahash(&sha_algs[j]);
+
+	return err;
+}
+
+static int __devinit atmel_sha_probe(struct platform_device *pdev)
+{
+	struct atmel_sha_dev *sha_dd;
+	struct device *dev = &pdev->dev;
+	struct resource *sha_res;
+	unsigned long sha_phys_size;
+	int err;
+
+	sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL);
+	if (sha_dd == NULL) {
+		dev_err(dev, "unable to alloc data struct.\n");
+		err = -ENOMEM;
+		goto sha_dd_err;
+	}
+
+	sha_dd->dev = dev;
+
+	platform_set_drvdata(pdev, sha_dd);
+
+	INIT_LIST_HEAD(&sha_dd->list);
+
+	tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
+					(unsigned long)sha_dd);
+
+	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
+
+	sha_dd->irq = -1;
+
+	/* Get the base address */
+	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!sha_res) {
+		dev_err(dev, "no MEM resource info\n");
+		err = -ENODEV;
+		goto res_err;
+	}
+	sha_dd->phys_base = sha_res->start;
+	sha_phys_size = resource_size(sha_res);
+
+	/* Get the IRQ */
+	sha_dd->irq = platform_get_irq(pdev,  0);
+	if (sha_dd->irq < 0) {
+		dev_err(dev, "no IRQ resource info\n");
+		err = sha_dd->irq;
+		goto res_err;
+	}
+
+	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
+						sha_dd);
+	if (err) {
+		dev_err(dev, "unable to request sha irq.\n");
+		goto res_err;
+	}
+
+	/* Initializing the clock */
+	sha_dd->iclk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sha_dd->iclk)) {
+		dev_err(dev, "clock intialization failed.\n");
+		err = PTR_ERR(sha_dd->iclk);
+		goto clk_err;
+	}
+
+	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
+	if (!sha_dd->io_base) {
+		dev_err(dev, "can't ioremap\n");
+		err = -ENOMEM;
+		goto sha_io_err;
+	}
+
+	spin_lock(&atmel_sha.lock);
+	list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
+	spin_unlock(&atmel_sha.lock);
+
+	err = atmel_sha_register_algs(sha_dd);
+	if (err)
+		goto err_algs;
+
+	dev_info(dev, "Atmel SHA1/SHA256\n");
+
+	return 0;
+
+err_algs:
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+	iounmap(sha_dd->io_base);
+sha_io_err:
+	clk_put(sha_dd->iclk);
+clk_err:
+	free_irq(sha_dd->irq, sha_dd);
+res_err:
+	tasklet_kill(&sha_dd->done_task);
+	kfree(sha_dd);
+	sha_dd = NULL;
+sha_dd_err:
+	dev_err(dev, "initialization failed.\n");
+
+	return err;
+}
+
+static int __devexit atmel_sha_remove(struct platform_device *pdev)
+{
+	static struct atmel_sha_dev *sha_dd;
+
+	sha_dd = platform_get_drvdata(pdev);
+	if (!sha_dd)
+		return -ENODEV;
+	spin_lock(&atmel_sha.lock);
+	list_del(&sha_dd->list);
+	spin_unlock(&atmel_sha.lock);
+
+	atmel_sha_unregister_algs(sha_dd);
+
+	tasklet_kill(&sha_dd->done_task);
+
+	iounmap(sha_dd->io_base);
+
+	clk_put(sha_dd->iclk);
+
+	if (sha_dd->irq >= 0)
+		free_irq(sha_dd->irq, sha_dd);
+
+	kfree(sha_dd);
+	sha_dd = NULL;
+
+	return 0;
+}
+
+static struct platform_driver atmel_sha_driver = {
+	.probe		= atmel_sha_probe,
+	.remove		= __devexit_p(atmel_sha_remove),
+	.driver		= {
+		.name	= "atmel_sha",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(atmel_sha_driver);
+
+MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support.");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
-- 
1.7.7.6


  parent reply	other threads:[~2012-07-01 17:19 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-07-01 17:19 [PATCH 0/5] AT91SAM9G46/M11: add crypto drivers Eric Bénard
2012-07-01 17:19 ` Eric Bénard
2012-07-01 17:19 ` Eric Bénard
2012-07-01 17:19 ` [PATCH 1/5] ARM: AT91SAM9G45: add crypto peripherals Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19 ` [PATCH 2/5] crypto: add Atmel AES driver Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-06 12:17   ` Jean-Christophe PLAGNIOL-VILLARD
2012-07-06 12:17     ` Jean-Christophe PLAGNIOL-VILLARD
2012-07-06 12:17     ` Jean-Christophe PLAGNIOL-VILLARD
2012-07-06 13:25     ` Eric Bénard
2012-07-06 13:25       ` Eric Bénard
2012-07-06 13:25       ` Eric Bénard
2012-07-01 17:19 ` [PATCH 3/5] crypto: add Atmel DES/TDES driver Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19 ` Eric Bénard [this message]
2012-07-01 17:19   ` [PATCH 4/5] crypto: add Atmel SHA1/SHA256 driver Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19 ` [PATCH 5/5] crypto: add new tests to tcrypt Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-01 17:19   ` Eric Bénard
2012-07-11  3:25 ` [PATCH 0/5] AT91SAM9G46/M11: add crypto drivers Herbert Xu
2012-07-11  3:25   ` Herbert Xu
2012-07-11  3:25   ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1341163187-14946-5-git-send-email-eric@eukrea.com \
    --to=eric@eukrea.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nicolas.ferre@atmel.com \
    --cc=nicolas@eukrea.com \
    --cc=plagnioj@jcrosoft.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.