Linux cryptographic layer development
 help / color / mirror / Atom feed
* [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
@ 2007-05-22 12:58 Evgeniy Polyakov
  2007-05-22 15:19 ` Sebastian Siewior
  2007-05-25  8:14 ` Herbert Xu
  0 siblings, 2 replies; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-22 12:58 UTC (permalink / raw)
  To: linux-crypto

Hi.

This is preliminary driver for HIFN 795x crypto accelerator chip.

It is slightly restructured acrypto driver, which worked correctly, but
this one was not tested with real hardware (yet, I will do it as soon as
get back my soekris adapter).
Likely it is not even a request for testing, since I see at least one
problem with current approach: what to do when crypto hardware queue is
full and no new packets can arrive? Current code just returns an error
-EINVAL if there is real error and -EBUSY if queue is full.
Due to problems with interrupt storms and possible adapter freeze
(sorry, but HIFN spec I have really sucks, so likely it is programming
error, but who knows) I added special watchdog, which fires if after
predefined timeout sessions which are supposed to be completed are not.
In that case callback is invoked with -EBUSY error.
Neither implementation I checked (OpenBSD, Linux OCF) does not support it,
but it helped me greatly in acrypto days.
I'm asking for special check for new cryptoapi async binding in this
driver, likely I did something wrong to support asynchronous processing.
80-chars per line is in TODO list for sure.

This driver supports old-style crypto_alg with "aes" string only, and I
would like to rise a discussion of the needs to support several
structures for cbc(aes), ecb(aes) and so on, since some hardware
supports plenty of modes, and allocating set of structures for each
hardware adapter found in the system would be an overkill.

Current driver only supports AES ECB encrypt/decrypt, since I do not 
know how to detect operation mode in runtime (a question).
Another issue unknown issue is a possibility to call setkey() the same
time encrypt/decrypt is called. As far as I can see it can not be done,
but I may be wrong, if so, small changes are needed in hifn_setkey
(mainly operation must be done under dev->lock).

Patch is a bit big and contains quite a few debugging craps, but it is
first revision.

I want to give a credit for most of register definitions to OpenBSD OCF.

Thanks.

Signed-off-by: Evgeniy Polyakov <johnpol@2ka.mipt.ru>

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index ff8c4be..9d2fffd 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -66,4 +66,16 @@ config CRYPTO_DEV_GEODE
 	  To compile this driver as a module, choose M here: the module
 	  will be called geode-aes.
 
+config CRYPTO_DEV_HIFN_795x
+	tristate "Support for HIFN 795x crypto processors"
+	depends on CRYPTO && PCI
+	select CRYPTO_ALGAPI
+	select CRYPTO_BLKCIPHER
+	help
+	  Say 'Y' here to enable support for HIFN 795x crypto processor for
+	  various crypto algorithms this processor supports.
+	  
+	  To compile this driver as a module, choose M here: the module
+	  will be called hifn795x.
+
 endmenu
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 6059cf8..f8c1af8 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_CRYPTO_DEV_PADLOCK) += padlock.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o
 obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o
 obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
+obj-$(CRYPTO_DEV_HIFN_795x) += hifn_795x.o
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
new file mode 100644
index 0000000..1c07138
--- /dev/null
+++ b/drivers/crypto/hifn_795x.c
@@ -0,0 +1,1900 @@
+/*
+ * 2007+ Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * All rights reserved.
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/crypto.h>
+
+#include <crypto/algapi.h>
+
+#include <asm/kmap_types.h>
+
+#undef dprintk
+
+#define HIFN_TEST
+#define HIFN_DEBUG
+
+#ifdef HIFN_DEBUG
+#define dprintk(f, a...) 	printk(f, ##a)
+#else
+#define dprintk(f, a...)	do {} while (0)
+#endif
+
+static int hifn_dev_number;
+
+#define ACRYPTO_OP_DECRYPT	0
+#define ACRYPTO_OP_ENCRYPT	1
+#define ACRYPTO_OP_HMAC		2
+#define ACRYPTO_OP_RNG		3
+
+#define ACRYPTO_MODE_ECB		0
+#define ACRYPTO_MODE_CBC		1
+#define ACRYPTO_MODE_CFB		2
+#define ACRYPTO_MODE_OFB		3
+#define ACRYPTO_MODE_CTR		4
+
+#define ACRYPTO_TYPE_AES_128	0
+#define ACRYPTO_TYPE_AES_192	1
+#define ACRYPTO_TYPE_AES_256	2
+#define ACRYPTO_TYPE_3DES	3
+
+#define PCI_VENDOR_ID_HIFN		0x13A3
+#define PCI_DEVICE_ID_HIFN_7955		0x0020
+#define	PCI_DEVICE_ID_HIFN_7956		0x001d
+
+/* I/O region sizes */
+
+#define HIFN_BAR0_SIZE			0x1000
+#define HIFN_BAR1_SIZE			0x2000
+#define HIFN_BAR2_SIZE			0x8000
+
+/* DMA registres */
+
+#define HIFN_DMA_CRA 			0x0C	/* DMA Command Ring Address */
+#define HIFN_DMA_SDRA 			0x1C	/* DMA Source Data Ring Address */
+#define HIFN_DMA_RRA			0x2C	/* DMA Result Ring Address */
+#define HIFN_DMA_DDRA			0x3C	/* DMA Destination Data Ring Address */
+#define HIFN_DMA_STCTL			0x40	/* DMA Status and Control */
+#define HIFN_DMA_INTREN 		0x44	/* DMA Interrupt Enable */
+#define HIFN_DMA_CFG1			0x48	/* DMA Configuration #1 */
+#define HIFN_DMA_CFG2			0x6C	/* DMA Configuration #2 */
+#define HIFN_CHIP_ID			0x98	/* Chip ID */
+
+/*
+ * Processing Unit Registers (offset from BASEREG0)
+ */
+#define	HIFN_0_PUDATA		0x00	/* Processing Unit Data */
+#define	HIFN_0_PUCTRL		0x04	/* Processing Unit Control */
+#define	HIFN_0_PUISR		0x08	/* Processing Unit Interrupt Status */
+#define	HIFN_0_PUCNFG		0x0c	/* Processing Unit Configuration */
+#define	HIFN_0_PUIER		0x10	/* Processing Unit Interrupt Enable */
+#define	HIFN_0_PUSTAT		0x14	/* Processing Unit Status/Chip ID */
+#define	HIFN_0_FIFOSTAT		0x18	/* FIFO Status */
+#define	HIFN_0_FIFOCNFG		0x1c	/* FIFO Configuration */
+#define	HIFN_0_SPACESIZE	0x20	/* Register space size */
+
+/* Processing Unit Control Register (HIFN_0_PUCTRL) */
+#define	HIFN_PUCTRL_CLRSRCFIFO	0x0010	/* clear source fifo */
+#define	HIFN_PUCTRL_STOP	0x0008	/* stop pu */
+#define	HIFN_PUCTRL_LOCKRAM	0x0004	/* lock ram */
+#define	HIFN_PUCTRL_DMAENA	0x0002	/* enable dma */
+#define	HIFN_PUCTRL_RESET	0x0001	/* Reset processing unit */
+
+/* Processing Unit Interrupt Status Register (HIFN_0_PUISR) */
+#define	HIFN_PUISR_CMDINVAL	0x8000	/* Invalid command interrupt */
+#define	HIFN_PUISR_DATAERR	0x4000	/* Data error interrupt */
+#define	HIFN_PUISR_SRCFIFO	0x2000	/* Source FIFO ready interrupt */
+#define	HIFN_PUISR_DSTFIFO	0x1000	/* Destination FIFO ready interrupt */
+#define	HIFN_PUISR_DSTOVER	0x0200	/* Destination overrun interrupt */
+#define	HIFN_PUISR_SRCCMD	0x0080	/* Source command interrupt */
+#define	HIFN_PUISR_SRCCTX	0x0040	/* Source context interrupt */
+#define	HIFN_PUISR_SRCDATA	0x0020	/* Source data interrupt */
+#define	HIFN_PUISR_DSTDATA	0x0010	/* Destination data interrupt */
+#define	HIFN_PUISR_DSTRESULT	0x0004	/* Destination result interrupt */
+
+/* Processing Unit Configuration Register (HIFN_0_PUCNFG) */
+#define	HIFN_PUCNFG_DRAMMASK	0xe000	/* DRAM size mask */
+#define	HIFN_PUCNFG_DSZ_256K	0x0000	/* 256k dram */
+#define	HIFN_PUCNFG_DSZ_512K	0x2000	/* 512k dram */
+#define	HIFN_PUCNFG_DSZ_1M	0x4000	/* 1m dram */
+#define	HIFN_PUCNFG_DSZ_2M	0x6000	/* 2m dram */
+#define	HIFN_PUCNFG_DSZ_4M	0x8000	/* 4m dram */
+#define	HIFN_PUCNFG_DSZ_8M	0xa000	/* 8m dram */
+#define	HIFN_PUNCFG_DSZ_16M	0xc000	/* 16m dram */
+#define	HIFN_PUCNFG_DSZ_32M	0xe000	/* 32m dram */
+#define	HIFN_PUCNFG_DRAMREFRESH	0x1800	/* DRAM refresh rate mask */
+#define	HIFN_PUCNFG_DRFR_512	0x0000	/* 512 divisor of ECLK */
+#define	HIFN_PUCNFG_DRFR_256	0x0800	/* 256 divisor of ECLK */
+#define	HIFN_PUCNFG_DRFR_128	0x1000	/* 128 divisor of ECLK */
+#define	HIFN_PUCNFG_TCALLPHASES	0x0200	/* your guess is as good as mine... */
+#define	HIFN_PUCNFG_TCDRVTOTEM	0x0100	/* your guess is as good as mine... */
+#define	HIFN_PUCNFG_BIGENDIAN	0x0080	/* DMA big endian mode */
+#define	HIFN_PUCNFG_BUS32	0x0040	/* Bus width 32bits */
+#define	HIFN_PUCNFG_BUS16	0x0000	/* Bus width 16 bits */
+#define	HIFN_PUCNFG_CHIPID	0x0020	/* Allow chipid from PUSTAT */
+#define	HIFN_PUCNFG_DRAM	0x0010	/* Context RAM is DRAM */
+#define	HIFN_PUCNFG_SRAM	0x0000	/* Context RAM is SRAM */
+#define	HIFN_PUCNFG_COMPSING	0x0004	/* Enable single compression context */
+#define	HIFN_PUCNFG_ENCCNFG	0x0002	/* Encryption configuration */
+
+/* Processing Unit Interrupt Enable Register (HIFN_0_PUIER) */
+#define	HIFN_PUIER_CMDINVAL	0x8000	/* Invalid command interrupt */
+#define	HIFN_PUIER_DATAERR	0x4000	/* Data error interrupt */
+#define	HIFN_PUIER_SRCFIFO	0x2000	/* Source FIFO ready interrupt */
+#define	HIFN_PUIER_DSTFIFO	0x1000	/* Destination FIFO ready interrupt */
+#define	HIFN_PUIER_DSTOVER	0x0200	/* Destination overrun interrupt */
+#define	HIFN_PUIER_SRCCMD	0x0080	/* Source command interrupt */
+#define	HIFN_PUIER_SRCCTX	0x0040	/* Source context interrupt */
+#define	HIFN_PUIER_SRCDATA	0x0020	/* Source data interrupt */
+#define	HIFN_PUIER_DSTDATA	0x0010	/* Destination data interrupt */
+#define	HIFN_PUIER_DSTRESULT	0x0004	/* Destination result interrupt */
+
+/* Processing Unit Status Register/Chip ID (HIFN_0_PUSTAT) */
+#define	HIFN_PUSTAT_CMDINVAL	0x8000	/* Invalid command interrupt */
+#define	HIFN_PUSTAT_DATAERR	0x4000	/* Data error interrupt */
+#define	HIFN_PUSTAT_SRCFIFO	0x2000	/* Source FIFO ready interrupt */
+#define	HIFN_PUSTAT_DSTFIFO	0x1000	/* Destination FIFO ready interrupt */
+#define	HIFN_PUSTAT_DSTOVER	0x0200	/* Destination overrun interrupt */
+#define	HIFN_PUSTAT_SRCCMD	0x0080	/* Source command interrupt */
+#define	HIFN_PUSTAT_SRCCTX	0x0040	/* Source context interrupt */
+#define	HIFN_PUSTAT_SRCDATA	0x0020	/* Source data interrupt */
+#define	HIFN_PUSTAT_DSTDATA	0x0010	/* Destination data interrupt */
+#define	HIFN_PUSTAT_DSTRESULT	0x0004	/* Destination result interrupt */
+#define	HIFN_PUSTAT_CHIPREV	0x00ff	/* Chip revision mask */
+#define	HIFN_PUSTAT_CHIPENA	0xff00	/* Chip enabled mask */
+#define	HIFN_PUSTAT_ENA_2	0x1100	/* Level 2 enabled */
+#define	HIFN_PUSTAT_ENA_1	0x1000	/* Level 1 enabled */
+#define	HIFN_PUSTAT_ENA_0	0x3000	/* Level 0 enabled */
+#define	HIFN_PUSTAT_REV_2	0x0020	/* 7751 PT6/2 */
+#define	HIFN_PUSTAT_REV_3	0x0030	/* 7751 PT6/3 */
+
+/* FIFO Status Register (HIFN_0_FIFOSTAT) */
+#define	HIFN_FIFOSTAT_SRC	0x7f00	/* Source FIFO available */
+#define	HIFN_FIFOSTAT_DST	0x007f	/* Destination FIFO available */
+
+/* FIFO Configuration Register (HIFN_0_FIFOCNFG) */
+#define	HIFN_FIFOCNFG_THRESHOLD	0x0400	/* must be written as 1 */
+
+/*
+ * DMA Interface Registers (offset from BASEREG1)
+ */
+#define	HIFN_1_DMA_CRAR		0x0c	/* DMA Command Ring Address */
+#define	HIFN_1_DMA_SRAR		0x1c	/* DMA Source Ring Address */
+#define	HIFN_1_DMA_RRAR		0x2c	/* DMA Result Ring Address */
+#define	HIFN_1_DMA_DRAR		0x3c	/* DMA Destination Ring Address */
+#define	HIFN_1_DMA_CSR		0x40	/* DMA Status and Control */
+#define	HIFN_1_DMA_IER		0x44	/* DMA Interrupt Enable */
+#define	HIFN_1_DMA_CNFG		0x48	/* DMA Configuration */
+#define	HIFN_1_PLL		0x4c	/* 795x: PLL config */
+#define	HIFN_1_7811_RNGENA	0x60	/* 7811: rng enable */
+#define	HIFN_1_7811_RNGCFG	0x64	/* 7811: rng config */
+#define	HIFN_1_7811_RNGDAT	0x68	/* 7811: rng data */
+#define	HIFN_1_7811_RNGSTS	0x6c	/* 7811: rng status */
+#define	HIFN_1_7811_MIPSRST	0x94	/* 7811: MIPS reset */
+#define	HIFN_1_REVID		0x98	/* Revision ID */
+#define	HIFN_1_UNLOCK_SECRET1	0xf4
+#define	HIFN_1_UNLOCK_SECRET2	0xfc
+#define	HIFN_1_PUB_RESET	0x204	/* Public/RNG Reset */
+#define	HIFN_1_PUB_BASE		0x300	/* Public Base Address */
+#define	HIFN_1_PUB_OPLEN	0x304	/* Public Operand Length */
+#define	HIFN_1_PUB_OP		0x308	/* Public Operand */
+#define	HIFN_1_PUB_STATUS	0x30c	/* Public Status */
+#define	HIFN_1_PUB_IEN		0x310	/* Public Interrupt enable */
+#define	HIFN_1_RNG_CONFIG	0x314	/* RNG config */
+#define	HIFN_1_RNG_DATA		0x318	/* RNG data */
+#define	HIFN_1_PUB_MEM		0x400	/* start of Public key memory */
+#define	HIFN_1_PUB_MEMEND	0xbff	/* end of Public key memory */
+
+/* DMA Status and Control Register (HIFN_1_DMA_CSR) */
+#define	HIFN_DMACSR_D_CTRLMASK	0xc0000000	/* Destinition Ring Control */
+#define	HIFN_DMACSR_D_CTRL_NOP	0x00000000	/* Dest. Control: no-op */
+#define	HIFN_DMACSR_D_CTRL_DIS	0x40000000	/* Dest. Control: disable */
+#define	HIFN_DMACSR_D_CTRL_ENA	0x80000000	/* Dest. Control: enable */
+#define	HIFN_DMACSR_D_ABORT	0x20000000	/* Destinition Ring PCIAbort */
+#define	HIFN_DMACSR_D_DONE	0x10000000	/* Destinition Ring Done */
+#define	HIFN_DMACSR_D_LAST	0x08000000	/* Destinition Ring Last */
+#define	HIFN_DMACSR_D_WAIT	0x04000000	/* Destinition Ring Waiting */
+#define	HIFN_DMACSR_D_OVER	0x02000000	/* Destinition Ring Overflow */
+#define	HIFN_DMACSR_R_CTRL	0x00c00000	/* Result Ring Control */
+#define	HIFN_DMACSR_R_CTRL_NOP	0x00000000	/* Result Control: no-op */
+#define	HIFN_DMACSR_R_CTRL_DIS	0x00400000	/* Result Control: disable */
+#define	HIFN_DMACSR_R_CTRL_ENA	0x00800000	/* Result Control: enable */
+#define	HIFN_DMACSR_R_ABORT	0x00200000	/* Result Ring PCI Abort */
+#define	HIFN_DMACSR_R_DONE	0x00100000	/* Result Ring Done */
+#define	HIFN_DMACSR_R_LAST	0x00080000	/* Result Ring Last */
+#define	HIFN_DMACSR_R_WAIT	0x00040000	/* Result Ring Waiting */
+#define	HIFN_DMACSR_R_OVER	0x00020000	/* Result Ring Overflow */
+#define	HIFN_DMACSR_S_CTRL	0x0000c000	/* Source Ring Control */
+#define	HIFN_DMACSR_S_CTRL_NOP	0x00000000	/* Source Control: no-op */
+#define	HIFN_DMACSR_S_CTRL_DIS	0x00004000	/* Source Control: disable */
+#define	HIFN_DMACSR_S_CTRL_ENA	0x00008000	/* Source Control: enable */
+#define	HIFN_DMACSR_S_ABORT	0x00002000	/* Source Ring PCI Abort */
+#define	HIFN_DMACSR_S_DONE	0x00001000	/* Source Ring Done */
+#define	HIFN_DMACSR_S_LAST	0x00000800	/* Source Ring Last */
+#define	HIFN_DMACSR_S_WAIT	0x00000400	/* Source Ring Waiting */
+#define	HIFN_DMACSR_ILLW	0x00000200	/* Illegal write (7811 only) */
+#define	HIFN_DMACSR_ILLR	0x00000100	/* Illegal read (7811 only) */
+#define	HIFN_DMACSR_C_CTRL	0x000000c0	/* Command Ring Control */
+#define	HIFN_DMACSR_C_CTRL_NOP	0x00000000	/* Command Control: no-op */
+#define	HIFN_DMACSR_C_CTRL_DIS	0x00000040	/* Command Control: disable */
+#define	HIFN_DMACSR_C_CTRL_ENA	0x00000080	/* Command Control: enable */
+#define	HIFN_DMACSR_C_ABORT	0x00000020	/* Command Ring PCI Abort */
+#define	HIFN_DMACSR_C_DONE	0x00000010	/* Command Ring Done */
+#define	HIFN_DMACSR_C_LAST	0x00000008	/* Command Ring Last */
+#define	HIFN_DMACSR_C_WAIT	0x00000004	/* Command Ring Waiting */
+#define	HIFN_DMACSR_PUBDONE	0x00000002	/* Public op done (7951 only) */
+#define	HIFN_DMACSR_ENGINE	0x00000001	/* Command Ring Engine IRQ */
+
+/* DMA Interrupt Enable Register (HIFN_1_DMA_IER) */
+#define	HIFN_DMAIER_D_ABORT	0x20000000	/* Destination Ring PCIAbort */
+#define	HIFN_DMAIER_D_DONE	0x10000000	/* Destination Ring Done */
+#define	HIFN_DMAIER_D_LAST	0x08000000	/* Destination Ring Last */
+#define	HIFN_DMAIER_D_WAIT	0x04000000	/* Destination Ring Waiting */
+#define	HIFN_DMAIER_D_OVER	0x02000000	/* Destination Ring Overflow */
+#define	HIFN_DMAIER_R_ABORT	0x00200000	/* Result Ring PCI Abort */
+#define	HIFN_DMAIER_R_DONE	0x00100000	/* Result Ring Done */
+#define	HIFN_DMAIER_R_LAST	0x00080000	/* Result Ring Last */
+#define	HIFN_DMAIER_R_WAIT	0x00040000	/* Result Ring Waiting */
+#define	HIFN_DMAIER_R_OVER	0x00020000	/* Result Ring Overflow */
+#define	HIFN_DMAIER_S_ABORT	0x00002000	/* Source Ring PCI Abort */
+#define	HIFN_DMAIER_S_DONE	0x00001000	/* Source Ring Done */
+#define	HIFN_DMAIER_S_LAST	0x00000800	/* Source Ring Last */
+#define	HIFN_DMAIER_S_WAIT	0x00000400	/* Source Ring Waiting */
+#define	HIFN_DMAIER_ILLW	0x00000200	/* Illegal write (7811 only) */
+#define	HIFN_DMAIER_ILLR	0x00000100	/* Illegal read (7811 only) */
+#define	HIFN_DMAIER_C_ABORT	0x00000020	/* Command Ring PCI Abort */
+#define	HIFN_DMAIER_C_DONE	0x00000010	/* Command Ring Done */
+#define	HIFN_DMAIER_C_LAST	0x00000008	/* Command Ring Last */
+#define	HIFN_DMAIER_C_WAIT	0x00000004	/* Command Ring Waiting */
+#define	HIFN_DMAIER_PUBDONE	0x00000002	/* public op done (7951 only) */
+#define	HIFN_DMAIER_ENGINE	0x00000001	/* Engine IRQ */
+
+/* DMA Configuration Register (HIFN_1_DMA_CNFG) */
+#define	HIFN_DMACNFG_BIGENDIAN	0x10000000	/* big endian mode */
+#define	HIFN_DMACNFG_POLLFREQ	0x00ff0000	/* Poll frequency mask */
+#define	HIFN_DMACNFG_UNLOCK	0x00000800
+#define	HIFN_DMACNFG_POLLINVAL	0x00000700	/* Invalid Poll Scalar */
+#define	HIFN_DMACNFG_LAST	0x00000010	/* Host control LAST bit */
+#define	HIFN_DMACNFG_MODE	0x00000004	/* DMA mode */
+#define	HIFN_DMACNFG_DMARESET	0x00000002	/* DMA Reset # */
+#define	HIFN_DMACNFG_MSTRESET	0x00000001	/* Master Reset # */
+
+#define	HIFN_PLL_7956		0x00001d18	/* 7956 PLL config value */
+
+/* Public key reset register (HIFN_1_PUB_RESET) */
+#define	HIFN_PUBRST_RESET	0x00000001	/* reset public/rng unit */
+
+/* Public base address register (HIFN_1_PUB_BASE) */
+#define	HIFN_PUBBASE_ADDR	0x00003fff	/* base address */
+
+/* Public operand length register (HIFN_1_PUB_OPLEN) */
+#define	HIFN_PUBOPLEN_MOD_M	0x0000007f	/* modulus length mask */
+#define	HIFN_PUBOPLEN_MOD_S	0		/* modulus length shift */
+#define	HIFN_PUBOPLEN_EXP_M	0x0003ff80	/* exponent length mask */
+#define	HIFN_PUBOPLEN_EXP_S	7		/* exponent lenght shift */
+#define	HIFN_PUBOPLEN_RED_M	0x003c0000	/* reducend length mask */
+#define	HIFN_PUBOPLEN_RED_S	18		/* reducend length shift */
+
+/* Public operation register (HIFN_1_PUB_OP) */
+#define	HIFN_PUBOP_AOFFSET_M	0x0000007f	/* A offset mask */
+#define	HIFN_PUBOP_AOFFSET_S	0		/* A offset shift */
+#define	HIFN_PUBOP_BOFFSET_M	0x00000f80	/* B offset mask */
+#define	HIFN_PUBOP_BOFFSET_S	7		/* B offset shift */
+#define	HIFN_PUBOP_MOFFSET_M	0x0003f000	/* M offset mask */
+#define	HIFN_PUBOP_MOFFSET_S	12		/* M offset shift */
+#define	HIFN_PUBOP_OP_MASK	0x003c0000	/* Opcode: */
+#define	HIFN_PUBOP_OP_NOP	0x00000000	/*  NOP */
+#define	HIFN_PUBOP_OP_ADD	0x00040000	/*  ADD */
+#define	HIFN_PUBOP_OP_ADDC	0x00080000	/*  ADD w/carry */
+#define	HIFN_PUBOP_OP_SUB	0x000c0000	/*  SUB */
+#define	HIFN_PUBOP_OP_SUBC	0x00100000	/*  SUB w/carry */
+#define	HIFN_PUBOP_OP_MODADD	0x00140000	/*  Modular ADD */
+#define	HIFN_PUBOP_OP_MODSUB	0x00180000	/*  Modular SUB */
+#define	HIFN_PUBOP_OP_INCA	0x001c0000	/*  INC A */
+#define	HIFN_PUBOP_OP_DECA	0x00200000	/*  DEC A */
+#define	HIFN_PUBOP_OP_MULT	0x00240000	/*  MULT */
+#define	HIFN_PUBOP_OP_MODMULT	0x00280000	/*  Modular MULT */
+#define	HIFN_PUBOP_OP_MODRED	0x002c0000	/*  Modular RED */
+#define	HIFN_PUBOP_OP_MODEXP	0x00300000	/*  Modular EXP */
+
+/* Public status register (HIFN_1_PUB_STATUS) */
+#define	HIFN_PUBSTS_DONE	0x00000001	/* operation done */
+#define	HIFN_PUBSTS_CARRY	0x00000002	/* carry */
+
+/* Public interrupt enable register (HIFN_1_PUB_IEN) */
+#define	HIFN_PUBIEN_DONE	0x00000001	/* operation done interrupt */
+
+/* Random number generator config register (HIFN_1_RNG_CONFIG) */
+#define	HIFN_RNGCFG_ENA		0x00000001	/* enable rng */
+
+
+#define HIFN_NAMESIZE			32
+#define HIFN_MAX_RESULT_ORDER		5
+
+#define	HIFN_D_CMD_RSIZE		24*4
+#define	HIFN_D_SRC_RSIZE		80*4
+#define	HIFN_D_DST_RSIZE		80*4
+#define	HIFN_D_RES_RSIZE		24*4
+
+#define HIFN_QUEUE_LENGTH		HIFN_D_CMD_RSIZE-5
+
+#define HIFN_DES_KEY_LENGTH		8
+#define HIFN_3DES_KEY_LENGTH		24
+#define HIFN_MAX_CRYPT_KEY_LENGTH	HIFN_3DES_KEY_LENGTH
+#define HIFN_IV_LENGTH			8
+#define HIFN_AES_IV_LENGTH		16
+#define	HIFN_MAX_IV_LENGTH		HIFN_AES_IV_LENGTH
+
+#define HIFN_MAC_KEY_LENGTH		64
+#define HIFN_MD5_LENGTH			16
+#define HIFN_SHA1_LENGTH		20
+#define HIFN_MAC_TRUNC_LENGTH		12
+
+#define	HIFN_MAX_COMMAND	(8 + 8 + 8 + 64 + 260)
+#define	HIFN_MAX_RESULT		(8 + 4 + 4 + 20 + 4)
+
+struct hifn_desc
+{
+	volatile u32		l;
+	volatile u32		p;
+};
+
+struct hifn_dma {
+	struct hifn_desc	cmdr[HIFN_D_CMD_RSIZE+1];
+	struct hifn_desc	srcr[HIFN_D_SRC_RSIZE+1];
+	struct hifn_desc	dstr[HIFN_D_DST_RSIZE+1];
+	struct hifn_desc	resr[HIFN_D_RES_RSIZE+1];
+
+	u8			command_bufs[HIFN_D_CMD_RSIZE][HIFN_MAX_COMMAND];
+	u8			result_bufs[HIFN_D_CMD_RSIZE][HIFN_MAX_RESULT];
+
+	u64			test_src, test_dst;
+
+	/*
+	 *  Our current positions for insertion and removal from the descriptor
+	 *  rings. 
+	 */
+	volatile int		cmdi, srci, dsti, resi;
+	volatile int		cmdu, srcu, dstu, resu;
+	int			cmdk, srck, dstk, resk;
+};
+
+#define HIFN_FLAG_CMD_BUSY	(1<<0)
+#define HIFN_FLAG_SRC_BUSY	(1<<1)
+#define HIFN_FLAG_DST_BUSY	(1<<2)
+#define HIFN_FLAG_RES_BUSY	(1<<3)
+#define HIFN_FLAG_OLD_KEY	(1<<4)
+
+struct hifn_device
+{
+	char			name[HIFN_NAMESIZE];
+
+	int			irq;
+	
+	struct pci_dev		*pdev;
+	void __iomem		*bar[3];
+	
+	unsigned long		result_mem;
+	dma_addr_t		dst;
+
+	void			*desc_virt;
+	dma_addr_t		desc_dma;
+
+	u32			dmareg;
+
+	void 			*sa[HIFN_D_RES_RSIZE];
+
+	u32			ram_size;
+	u32			max_sessions;
+
+	spinlock_t		lock;
+	
+	void 			*priv;
+
+	u32			flags;
+	int			active;
+	struct delayed_work	work;
+	unsigned long		waiting;
+	unsigned long		reset;
+	unsigned long		intr;
+	unsigned long		success;
+	unsigned long		prev_success;
+	unsigned long		dequeue_failed;
+	unsigned long		break_session;
+
+	int			started;
+
+	u8			snum;
+	u8			current_key[HIFN_MAX_CRYPT_KEY_LENGTH];
+	int			current_key_len;
+	
+	struct crypto_alg	*alg;
+};
+
+#define	HIFN_D_LENGTH			0x0000ffff
+#define	HIFN_D_NOINVALID		0x01000000
+#define	HIFN_D_MASKDONEIRQ		0x02000000
+#define	HIFN_D_DESTOVER			0x04000000
+#define	HIFN_D_OVER			0x08000000
+#define	HIFN_D_LAST			0x20000000
+#define	HIFN_D_JUMP			0x40000000
+#define	HIFN_D_VALID			0x80000000
+
+struct hifn_base_command
+{
+	volatile u16		masks;
+	volatile u16		session_num;
+	volatile u16		total_source_count;
+	volatile u16		total_dest_count;
+};
+
+#define	HIFN_BASE_CMD_COMP		0x0100	/* enable compression engine */
+#define	HIFN_BASE_CMD_PAD		0x0200	/* enable padding engine */
+#define	HIFN_BASE_CMD_MAC		0x0400	/* enable MAC engine */
+#define	HIFN_BASE_CMD_CRYPT		0x0800	/* enable crypt engine */
+#define	HIFN_BASE_CMD_DECODE		0x2000
+#define	HIFN_BASE_CMD_SRCLEN_M		0xc000
+#define	HIFN_BASE_CMD_SRCLEN_S		14
+#define	HIFN_BASE_CMD_DSTLEN_M		0x3000
+#define	HIFN_BASE_CMD_DSTLEN_S		12
+#define	HIFN_BASE_CMD_LENMASK_HI	0x30000
+#define	HIFN_BASE_CMD_LENMASK_LO	0x0ffff
+
+/*
+ * Structure to help build up the command data structure.
+ */
+struct hifn_crypt_command 
+{
+	volatile u16 		masks;
+	volatile u16 		header_skip;
+	volatile u16 		source_count;
+	volatile u16 		reserved;
+};
+
+#define	HIFN_CRYPT_CMD_ALG_MASK		0x0003		/* algorithm: */
+#define	HIFN_CRYPT_CMD_ALG_DES		0x0000		/*   DES */
+#define	HIFN_CRYPT_CMD_ALG_3DES		0x0001		/*   3DES */
+#define	HIFN_CRYPT_CMD_ALG_RC4		0x0002		/*   RC4 */
+#define	HIFN_CRYPT_CMD_ALG_AES		0x0003		/*   AES */
+#define	HIFN_CRYPT_CMD_MODE_MASK	0x0018		/* Encrypt mode: */
+#define	HIFN_CRYPT_CMD_MODE_ECB		0x0000		/*   ECB */
+#define	HIFN_CRYPT_CMD_MODE_CBC		0x0008		/*   CBC */
+#define	HIFN_CRYPT_CMD_MODE_CFB		0x0010		/*   CFB */
+#define	HIFN_CRYPT_CMD_MODE_OFB		0x0018		/*   OFB */
+#define	HIFN_CRYPT_CMD_CLR_CTX		0x0040		/* clear context */
+#define	HIFN_CRYPT_CMD_KSZ_MASK		0x0600		/* AES key size: */
+#define	HIFN_CRYPT_CMD_KSZ_128		0x0000		/*  128 bit */
+#define	HIFN_CRYPT_CMD_KSZ_192		0x0200		/*  192 bit */
+#define	HIFN_CRYPT_CMD_KSZ_256		0x0400		/*  256 bit */
+#define	HIFN_CRYPT_CMD_NEW_KEY		0x0800		/* expect new key */
+#define	HIFN_CRYPT_CMD_NEW_IV		0x1000		/* expect new iv */
+#define	HIFN_CRYPT_CMD_SRCLEN_M		0xc000
+#define	HIFN_CRYPT_CMD_SRCLEN_S		14
+
+/*
+ * Structure to help build up the command data structure.
+ */
+struct hifn_mac_command 
+{
+	volatile u16 		masks;
+	volatile u16 		header_skip;
+	volatile u16 		source_count;
+	volatile u16 		reserved;
+};
+
+#define	HIFN_MAC_CMD_ALG_MASK		0x0001
+#define	HIFN_MAC_CMD_ALG_SHA1		0x0000
+#define	HIFN_MAC_CMD_ALG_MD5		0x0001
+#define	HIFN_MAC_CMD_MODE_MASK		0x000c
+#define	HIFN_MAC_CMD_MODE_HMAC		0x0000
+#define	HIFN_MAC_CMD_MODE_SSL_MAC	0x0004
+#define	HIFN_MAC_CMD_MODE_HASH		0x0008
+#define	HIFN_MAC_CMD_MODE_FULL		0x0004
+#define	HIFN_MAC_CMD_TRUNC		0x0010
+#define	HIFN_MAC_CMD_RESULT		0x0020
+#define	HIFN_MAC_CMD_APPEND		0x0040
+#define	HIFN_MAC_CMD_SRCLEN_M		0xc000
+#define	HIFN_MAC_CMD_SRCLEN_S		14
+
+/*
+ * MAC POS IPsec initiates authentication after encryption on encodes
+ * and before decryption on decodes.
+ */
+#define	HIFN_MAC_CMD_POS_IPSEC		0x0200
+#define	HIFN_MAC_CMD_NEW_KEY		0x0800
+
+struct hifn_comp_command 
+{
+	volatile u16 		masks;
+	volatile u16 		header_skip;
+	volatile u16 		source_count;
+	volatile u16 		reserved;
+};
+
+#define	HIFN_COMP_CMD_SRCLEN_M		0xc000
+#define	HIFN_COMP_CMD_SRCLEN_S		14
+#define	HIFN_COMP_CMD_ONE		0x0100	/* must be one */
+#define	HIFN_COMP_CMD_CLEARHIST		0x0010	/* clear history */
+#define	HIFN_COMP_CMD_UPDATEHIST	0x0008	/* update history */
+#define	HIFN_COMP_CMD_LZS_STRIP0	0x0004	/* LZS: strip zero */
+#define	HIFN_COMP_CMD_MPPC_RESTART	0x0004	/* MPPC: restart */
+#define	HIFN_COMP_CMD_ALG_MASK		0x0001	/* compression mode: */
+#define	HIFN_COMP_CMD_ALG_MPPC		0x0001	/*   MPPC */
+#define	HIFN_COMP_CMD_ALG_LZS		0x0000	/*   LZS */
+
+struct hifn_base_result 
+{
+	volatile u16 		flags;
+	volatile u16 		session;
+	volatile u16 		src_cnt;		/* 15:0 of source count */
+	volatile u16 		dst_cnt;		/* 15:0 of dest count */
+};
+
+#define	HIFN_BASE_RES_DSTOVERRUN	0x0200	/* destination overrun */
+#define	HIFN_BASE_RES_SRCLEN_M		0xc000	/* 17:16 of source count */
+#define	HIFN_BASE_RES_SRCLEN_S		14
+#define	HIFN_BASE_RES_DSTLEN_M		0x3000	/* 17:16 of dest count */
+#define	HIFN_BASE_RES_DSTLEN_S		12
+
+struct hifn_comp_result 
+{
+	volatile u16 		flags;
+	volatile u16 		crc;
+};
+
+#define	HIFN_COMP_RES_LCB_M		0xff00	/* longitudinal check byte */
+#define	HIFN_COMP_RES_LCB_S		8
+#define	HIFN_COMP_RES_RESTART		0x0004	/* MPPC: restart */
+#define	HIFN_COMP_RES_ENDMARKER		0x0002	/* LZS: end marker seen */
+#define	HIFN_COMP_RES_SRC_NOTZERO	0x0001	/* source expired */
+
+struct hifn_mac_result 
+{
+	volatile u16 		flags;
+	volatile u16 		reserved;
+	/* followed by 0, 6, 8, or 10 u16's of the MAC, then crypt */
+};
+
+#define	HIFN_MAC_RES_MISCOMPARE		0x0002	/* compare failed */
+#define	HIFN_MAC_RES_SRC_NOTZERO	0x0001	/* source expired */
+
+struct hifn_crypt_result 
+{
+	volatile u16 		flags;
+	volatile u16 		reserved;
+};
+
+#define	HIFN_CRYPT_RES_SRC_NOTZERO	0x0001	/* source expired */
+
+#ifndef HIFN_POLL_FREQUENCY
+#define	HIFN_POLL_FREQUENCY	0x1
+#endif
+
+#ifndef HIFN_POLL_SCALAR
+#define	HIFN_POLL_SCALAR	0x0
+#endif
+
+#define	HIFN_MAX_SEGLEN 	0xffff		/* maximum dma segment len */
+#define	HIFN_MAX_DMALEN		0x3ffff		/* maximum dma length */
+
+static inline u32 hifn_read_0(struct hifn_device *dev, u32 reg)
+{
+	u32 ret;
+
+	ret = readl((char *)(dev->bar[0]) + reg);
+
+	return ret;
+}
+
+static inline u32 hifn_read_1(struct hifn_device *dev, u32 reg)
+{
+	u32 ret;
+
+	ret = readl((char *)(dev->bar[1]) + reg);
+
+	return ret;
+}
+
+static inline void hifn_write_0(struct hifn_device *dev, u32 reg, u32 val)
+{
+	writel(val, (char *)(dev->bar[0]) + reg);
+}
+
+static inline void hifn_write_1(struct hifn_device *dev, u32 reg, u32 val)
+{
+	writel(val, (char *)(dev->bar[1]) + reg);
+}
+
+#if 0
+static void hifn_dump_ring(char *str, struct hifn_device *dev)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	dma_addr_t dptr = dev->desc_dma;
+
+	dprintk("%4d cmdr: %08x %d\n", 
+			dma->cmdi,
+			hifn_read_1(dev, HIFN_1_DMA_CRAR), 
+			(hifn_read_1(dev, HIFN_1_DMA_CRAR) - 
+			 	__cpu_to_le32(dptr + offsetof(struct hifn_dma, cmdr[0])))/sizeof(struct hifn_desc));
+	dprintk("%4d srcr: %08x %d\n", 
+			dma->srci,
+			hifn_read_1(dev, HIFN_1_DMA_SRAR), 
+			(hifn_read_1(dev, HIFN_1_DMA_SRAR) - 
+			 	__cpu_to_le32(dptr + offsetof(struct hifn_dma, srcr[0])))/sizeof(struct hifn_desc));
+	dprintk("%4d dstr: %08x %d\n", 
+			dma->dsti,
+			hifn_read_1(dev, HIFN_1_DMA_DRAR), 
+			(hifn_read_1(dev, HIFN_1_DMA_DRAR) - 
+			 	__cpu_to_le32(dptr + offsetof(struct hifn_dma, dstr[0])))/sizeof(struct hifn_desc));
+	dprintk("%4d resr: %08x %d\n", 
+			dma->resi,
+			hifn_read_1(dev, HIFN_1_DMA_RRAR), 
+			(hifn_read_1(dev, HIFN_1_DMA_RRAR) - 
+			 	__cpu_to_le32(dptr + offsetof(struct hifn_dma, resr[0])))/sizeof(struct hifn_desc));
+}
+#endif
+
+static void hifn_wait_puc(struct hifn_device *dev)
+{
+	int i;
+	u32 ret;
+
+	for (i=10000; i > 0; --i) {
+		ret = hifn_read_0(dev, HIFN_0_PUCTRL);
+		if (!(ret & HIFN_PUCTRL_RESET))
+			break;
+
+		udelay(1);
+	}
+
+	if (!i)
+		dprintk("%s: Failed to reset PUC unit.\n", dev->name);
+}
+
+static void hifn_reset_puc(struct hifn_device *dev)
+{
+	hifn_write_0(dev, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA);
+	hifn_wait_puc(dev);
+}
+
+static void hifn_stop_device(struct hifn_device *dev)
+{
+	hifn_write_1(dev, HIFN_1_DMA_CSR,
+		HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS |
+		HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_C_CTRL_DIS);
+	hifn_write_0(dev, HIFN_0_PUIER, 0);
+	hifn_write_1(dev, HIFN_1_DMA_IER, 0);
+}
+
+static void hifn_reset_dma(struct hifn_device *dev, int full)
+{
+	hifn_stop_device(dev);
+	
+	/*
+	 * Setting poll frequency and others to 0.
+	 */
+	hifn_write_1(dev, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
+	mdelay(1);
+
+	/*
+	 * Reset DMA.
+	 */
+	if (full) {
+		hifn_write_1(dev, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE);
+		mdelay(1);
+	} else {
+		hifn_write_1(dev, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE | HIFN_DMACNFG_MSTRESET);
+		hifn_reset_puc(dev);
+	}
+
+	hifn_write_1(dev, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
+
+	hifn_reset_puc(dev);
+}
+
+static u32 hifn_next_signature(u_int32_t a, u_int cnt)
+{
+	int i;
+	u32 v;
+
+	for (i = 0; i < cnt; i++) {
+
+		/* get the parity */
+		v = a & 0x80080125;
+		v ^= v >> 16;
+		v ^= v >> 8;
+		v ^= v >> 4;
+		v ^= v >> 2;
+		v ^= v >> 1;
+
+		a = (v & 1) ^ (a << 1);
+	}
+
+	return a;
+}
+
+static struct pci2id {
+	u_short		pci_vendor;
+	u_short		pci_prod;
+	char		card_id[13];
+} pci2id[] = {
+	{
+		PCI_VENDOR_ID_HIFN,
+		PCI_DEVICE_ID_HIFN_7955,
+		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x00, 0x00, 0x00, 0x00, 0x00 }
+	}, 
+	{
+		PCI_VENDOR_ID_HIFN,
+		PCI_DEVICE_ID_HIFN_7956,
+		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		  0x00, 0x00, 0x00, 0x00, 0x00 }
+	}
+};
+
+static int hifn_init_pubrng(struct hifn_device *dev)
+{
+	int i;
+	
+	hifn_write_1(dev, HIFN_1_PUB_RESET, hifn_read_1(dev, HIFN_1_PUB_RESET) | HIFN_PUBRST_RESET);
+
+	for (i=100; i > 0; --i) {
+		mdelay(1);
+
+		if ((hifn_read_1(dev, HIFN_1_PUB_RESET) & HIFN_PUBRST_RESET) == 0)
+			break;
+	}
+
+	if (!i)
+		dprintk("Chip %s: Failed to initialise public key engine.\n", dev->name);
+	else {
+		hifn_write_1(dev, HIFN_1_PUB_IEN, HIFN_PUBIEN_DONE);
+		dev->dmareg |= HIFN_DMAIER_PUBDONE;
+		hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg);
+
+		dprintk("Chip %s: Public key engine has been sucessfully initialised.\n", dev->name);
+	}
+
+	/*
+	 * Enable RNG engine.
+	 */
+
+	hifn_write_1(dev, HIFN_1_RNG_CONFIG, hifn_read_1(dev, HIFN_1_RNG_CONFIG) | HIFN_RNGCFG_ENA);
+	dprintk("Chip %s: RNG engine has been successfully initialised.\n", dev->name);
+
+	return 0;
+}
+
+static int hifn_enable_crypto(struct hifn_device *dev)
+{
+	u32 dmacfg, addr;
+	char *offtbl = NULL;
+	int i;
+	
+	for (i = 0; i < sizeof(pci2id)/sizeof(pci2id[0]); i++) {
+		if (pci2id[i].pci_vendor == dev->pdev->vendor && 
+				pci2id[i].pci_prod == dev->pdev->device) {
+			offtbl = pci2id[i].card_id;
+			break;
+		}
+	}
+
+	if (offtbl == NULL) {
+		dprintk("Chip %s: Unknown card!\n", dev->name);
+		return -ENODEV;
+	}
+
+	dmacfg = hifn_read_1(dev, HIFN_1_DMA_CNFG);
+
+	hifn_write_1(dev, HIFN_1_DMA_CNFG, 
+			HIFN_DMACNFG_UNLOCK | HIFN_DMACNFG_MSTRESET | 
+			HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
+	mdelay(1);
+	addr = hifn_read_1(dev, HIFN_1_UNLOCK_SECRET1);
+	mdelay(1);
+	hifn_write_1(dev, HIFN_1_UNLOCK_SECRET2, 0);
+	mdelay(1);
+
+	for (i=0; i<12; ++i) {
+		addr = hifn_next_signature(addr, offtbl[i] + 0x101);
+		hifn_write_1(dev, HIFN_1_UNLOCK_SECRET2, addr);
+
+		mdelay(1);
+	}
+	hifn_write_1(dev, HIFN_1_DMA_CNFG, dmacfg);
+	
+	dprintk("Chip %s: %s.\n", dev->name, pci_name(dev->pdev));
+
+	return 0;
+}
+
+static void hifn_init_dma(struct hifn_device *dev)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	u32 dptr = dev->desc_dma;
+	int i;
+
+	for (i=0; i<HIFN_D_CMD_RSIZE; ++i)
+		dma->cmdr[i].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, command_bufs[i][0]));
+	for (i=0; i<HIFN_D_RES_RSIZE; ++i)
+		dma->resr[i].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, result_bufs[i][0]));
+
+	/*
+	 * Setup LAST descriptors.
+	 */
+	dma->cmdr[HIFN_D_CMD_RSIZE].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, cmdr[0]));
+	dma->srcr[HIFN_D_SRC_RSIZE].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, srcr[0]));
+	dma->dstr[HIFN_D_DST_RSIZE].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, dstr[0]));
+	dma->resr[HIFN_D_RES_RSIZE].p = __cpu_to_le32(dptr + offsetof(struct hifn_dma, resr[0]));
+	
+	dma->cmdu = dma->srcu = dma->dstu = dma->resu = 0;
+	dma->cmdi = dma->srci = dma->dsti = dma->resi = 0;
+	dma->cmdk = dma->srck = dma->dstk = dma->resk = 0;
+}
+
+static void hifn_init_registers(struct hifn_device *dev)
+{
+	u32 dptr = dev->desc_dma;
+	
+	/* Initialization magic... */
+	hifn_write_0(dev, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA);
+	hifn_write_0(dev, HIFN_0_FIFOCNFG, HIFN_FIFOCNFG_THRESHOLD);
+	hifn_write_0(dev, HIFN_0_PUIER, HIFN_PUIER_DSTOVER);
+
+	/* write all 4 ring address registers */
+	hifn_write_1(dev, HIFN_1_DMA_CRAR, __cpu_to_le32(dptr + offsetof(struct hifn_dma, cmdr[0])));
+	hifn_write_1(dev, HIFN_1_DMA_SRAR, __cpu_to_le32(dptr + offsetof(struct hifn_dma, srcr[0])));
+	hifn_write_1(dev, HIFN_1_DMA_DRAR, __cpu_to_le32(dptr + offsetof(struct hifn_dma, dstr[0])));
+	hifn_write_1(dev, HIFN_1_DMA_RRAR, __cpu_to_le32(dptr + offsetof(struct hifn_dma, resr[0])));
+
+	mdelay(2);
+#if 0	
+	hifn_write_1(dev, HIFN_1_DMA_CSR,
+	    HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS |
+	    HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_C_CTRL_DIS |
+	    HIFN_DMACSR_D_ABORT | HIFN_DMACSR_D_DONE | HIFN_DMACSR_D_LAST |
+	    HIFN_DMACSR_D_WAIT | HIFN_DMACSR_D_OVER |
+	    HIFN_DMACSR_R_ABORT | HIFN_DMACSR_R_DONE | HIFN_DMACSR_R_LAST |
+	    HIFN_DMACSR_R_WAIT | HIFN_DMACSR_R_OVER |
+	    HIFN_DMACSR_S_ABORT | HIFN_DMACSR_S_DONE | HIFN_DMACSR_S_LAST |
+	    HIFN_DMACSR_S_WAIT |
+	    HIFN_DMACSR_C_ABORT | HIFN_DMACSR_C_DONE | HIFN_DMACSR_C_LAST |
+	    HIFN_DMACSR_C_WAIT |
+	    HIFN_DMACSR_ENGINE | 
+	    HIFN_DMACSR_PUBDONE);
+#else
+	hifn_write_1(dev, HIFN_1_DMA_CSR,
+	    HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA |
+	    HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA |
+	    HIFN_DMACSR_D_ABORT | HIFN_DMACSR_D_DONE | HIFN_DMACSR_D_LAST |
+	    HIFN_DMACSR_D_WAIT | HIFN_DMACSR_D_OVER |
+	    HIFN_DMACSR_R_ABORT | HIFN_DMACSR_R_DONE | HIFN_DMACSR_R_LAST |
+	    HIFN_DMACSR_R_WAIT | HIFN_DMACSR_R_OVER |
+	    HIFN_DMACSR_S_ABORT | HIFN_DMACSR_S_DONE | HIFN_DMACSR_S_LAST |
+	    HIFN_DMACSR_S_WAIT |
+	    HIFN_DMACSR_C_ABORT | HIFN_DMACSR_C_DONE | HIFN_DMACSR_C_LAST |
+	    HIFN_DMACSR_C_WAIT |
+	    HIFN_DMACSR_ENGINE | 
+	    HIFN_DMACSR_PUBDONE);
+#endif
+	hifn_read_1(dev, HIFN_1_DMA_CSR);
+
+	dev->dmareg |= HIFN_DMAIER_R_DONE | HIFN_DMAIER_C_ABORT |
+	    HIFN_DMAIER_D_OVER | HIFN_DMAIER_R_OVER |
+	    HIFN_DMAIER_S_ABORT | HIFN_DMAIER_D_ABORT | HIFN_DMAIER_R_ABORT |
+	    HIFN_DMAIER_ENGINE;
+	dev->dmareg &= ~HIFN_DMAIER_C_WAIT;
+
+	hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg);
+	hifn_read_1(dev, HIFN_1_DMA_IER);
+#if 0	
+	hifn_write_0(dev, HIFN_0_PUCNFG, HIFN_PUCNFG_ENCCNFG |
+		    HIFN_PUCNFG_DRFR_128 | HIFN_PUCNFG_TCALLPHASES |
+		    HIFN_PUCNFG_TCDRVTOTEM | HIFN_PUCNFG_BUS32 |
+		    HIFN_PUCNFG_DRAM);
+#else
+	hifn_write_0(dev, HIFN_0_PUCNFG, 0x10342);
+#endif
+	hifn_write_1(dev, HIFN_1_PLL, HIFN_PLL_7956);
+	
+	hifn_write_0(dev, HIFN_0_PUISR, HIFN_PUISR_DSTOVER);
+	hifn_write_1(dev, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
+	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE | HIFN_DMACNFG_LAST |
+	    ((HIFN_POLL_FREQUENCY << 16 ) & HIFN_DMACNFG_POLLFREQ) |
+	    ((HIFN_POLL_SCALAR << 8) & HIFN_DMACNFG_POLLINVAL));
+}
+
+static void hifn_init_sessions(struct hifn_device *dev)
+{
+	u32 pucnfg;
+	u32 ctxsize = 1;
+
+	pucnfg = hifn_read_0(dev, HIFN_0_PUCNFG);
+	
+	if (pucnfg & HIFN_PUCNFG_COMPSING) {
+		if (pucnfg & HIFN_PUCNFG_ENCCNFG)
+			ctxsize = 128;
+		else
+			ctxsize = 512;
+		
+		dev->max_sessions = dev->ram_size / ctxsize;
+	} else
+		dev->max_sessions = dev->ram_size / 16384;
+	
+	dprintk("Chip %s: ram size=%uK, max_sessions=%u.\n", 
+			dev->name, dev->ram_size / 1024,
+			dev->max_sessions);
+}
+
+static int hifn_setup_base_command(struct hifn_device *dev, u8 *buf, u16 dlen, u16 slen, u16 mask, u8 snum)
+{
+	struct hifn_base_command *base_cmd;
+	u8 *buf_pos = buf;
+	
+	base_cmd = (struct hifn_base_command *)buf_pos;
+	base_cmd->masks = __cpu_to_le16(mask);
+	base_cmd->total_source_count = __cpu_to_le16(slen & HIFN_BASE_CMD_LENMASK_LO);
+	base_cmd->total_dest_count = __cpu_to_le16(dlen & HIFN_BASE_CMD_LENMASK_LO);
+
+	dlen >>= 16;
+	slen >>= 16;
+	base_cmd->session_num = __cpu_to_le16(snum | 
+	    ((slen << HIFN_BASE_CMD_SRCLEN_S) & HIFN_BASE_CMD_SRCLEN_M) |
+	    ((dlen << HIFN_BASE_CMD_DSTLEN_S) & HIFN_BASE_CMD_DSTLEN_M));
+
+	return sizeof(struct hifn_base_command);
+}
+
+static int hifn_setup_crypto_command(struct hifn_device *dev, 
+		u8 *buf, u16 dlen, u16 slen, 
+		u8 *key, int keylen, u8 *iv, int ivlen, u16 mode)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	struct hifn_crypt_command *cry_cmd;
+	u8 *buf_pos = buf;
+	u16 cmd_len;
+
+	cry_cmd = (struct hifn_crypt_command *)buf_pos;
+
+	cry_cmd->source_count = __cpu_to_le16(dlen & 0xffff);
+	dlen >>= 16;
+	cry_cmd->masks = __cpu_to_le16(mode | ((dlen << HIFN_CRYPT_CMD_SRCLEN_S) & HIFN_CRYPT_CMD_SRCLEN_M));
+	cry_cmd->header_skip = 0;
+	cry_cmd->reserved = 0;
+
+	buf_pos += sizeof(struct hifn_crypt_command);
+
+	dma->cmdu++;
+	if (dma->cmdu > 1) {
+		dev->dmareg |= HIFN_DMAIER_C_WAIT;
+		hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg);
+	}
+
+	if (keylen) {
+		memcpy(buf_pos, key, keylen);
+		buf_pos += keylen;
+	}
+	if (ivlen) {
+		memcpy(buf_pos, iv, ivlen);
+		buf_pos += ivlen;
+	}
+
+	cmd_len = buf_pos - buf;
+
+	return cmd_len;
+}
+
+static int hifn_setup_src_desc(struct hifn_device *dev, struct scatterlist *sg, int nents)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	int i, mapped_nsg, idx, nbytes;
+	u32 last = 0;
+	
+	mapped_nsg = pci_map_sg(dev->pdev, sg, nents, PCI_DMA_TODEVICE);
+	
+	idx = dma->srci;
+
+	nbytes = 0;
+	for (i=0; i<mapped_nsg; ++i) {
+		if (i == mapped_nsg - 1)
+			last = HIFN_D_LAST;
+		
+		dma->srcr[idx].p = __cpu_to_le32(sg_dma_address(&sg[i]));
+		dma->srcr[idx].l = __cpu_to_le32(sg_dma_len(&sg[i]) | HIFN_D_VALID | 
+				HIFN_D_MASKDONEIRQ | HIFN_D_NOINVALID | last);
+		
+		if (++idx == HIFN_D_SRC_RSIZE) {
+			dma->srcr[idx].l = __cpu_to_le32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ | HIFN_D_LAST);
+			idx = 0;
+		}
+
+		nbytes += sg_dma_len(&sg[i]);
+	}
+		
+	dma->srci = idx;
+	dma->srcu += mapped_nsg;
+
+	if (!(dev->flags & HIFN_FLAG_SRC_BUSY)) {
+		hifn_write_1(dev, HIFN_1_DMA_CSR, HIFN_DMACSR_S_CTRL_ENA);
+		dev->flags |= HIFN_FLAG_SRC_BUSY;
+	}
+	
+	return nbytes;
+}
+
+static void hifn_setup_res_desc(struct hifn_device *dev)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	
+	dma->resr[dma->resi].l = __cpu_to_le32(12 | HIFN_D_VALID | HIFN_D_LAST);
+	//dma->resr[dma->resi].l = __cpu_to_le32(HIFN_MAX_RESULT | HIFN_D_VALID | HIFN_D_LAST | HIFN_D_NOINVALID);
+	
+	if (++dma->resi == HIFN_D_RES_RSIZE) {
+		dma->resr[HIFN_D_RES_RSIZE].l = __cpu_to_le32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ | HIFN_D_LAST);
+		dma->resi = 0;
+	}
+	
+	dma->resu++;
+
+	if (!(dev->flags & HIFN_FLAG_RES_BUSY)) {
+		hifn_write_1(dev, HIFN_1_DMA_CSR, HIFN_DMACSR_R_CTRL_ENA);
+		dev->flags |= HIFN_FLAG_RES_BUSY;
+	}
+}
+
+static void hifn_setup_dst_desc(struct hifn_device *dev, struct scatterlist *sg, int mapped_nsg)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	int idx, i;
+	u32 last = 0;
+	
+	mapped_nsg = pci_map_sg(dev->pdev, sg, mapped_nsg, PCI_DMA_FROMDEVICE);
+
+	idx = dma->dsti;
+
+	for (i=0; i<mapped_nsg; ++i) {
+		if (i == mapped_nsg - 1)
+			last = HIFN_D_LAST;
+		
+		dma->dstr[idx].p = __cpu_to_le32(sg_dma_address(&sg[i]));
+		dma->dstr[idx].l = __cpu_to_le32(sg_dma_len(&sg[i]) | HIFN_D_VALID | 
+				HIFN_D_MASKDONEIRQ | HIFN_D_NOINVALID | last);
+		
+		if (++idx == HIFN_D_DST_RSIZE) {
+			dma->dstr[idx].l = __cpu_to_le32(HIFN_D_VALID | HIFN_D_JUMP | 
+					HIFN_D_MASKDONEIRQ | HIFN_D_LAST | HIFN_D_NOINVALID);
+			idx = 0;
+		}
+	}
+	dma->dsti = idx;
+	dma->dstu += mapped_nsg;
+
+	if (!(dev->flags & HIFN_FLAG_DST_BUSY)) {
+		hifn_write_1(dev, HIFN_1_DMA_CSR, HIFN_DMACSR_D_CTRL_ENA);
+		dev->flags |= HIFN_FLAG_DST_BUSY;
+	}
+	
+}
+
+int hifn_setup_session(struct hifn_device *dev, 
+		struct scatterlist *src, int src_num,
+		struct scatterlist *dst, int dst_num, 
+		u8 *key, int keysize, u8 *iv, int ivsize,
+		u16 op, u16 type, u16 mode, u8 snum,
+		void *priv)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	int nbytes, cmd_len, sa_idx, err = -EINVAL;
+	u8 *buf, *buf_pos;
+	u16 mask;
+	u16 dlen, slen;
+
+	dlen = sg_dma_len(dst);
+	slen = sg_dma_len(src);
+	sa_idx = dma->resi;
+
+	key = iv = NULL;
+	keysize = ivsize = 0;
+
+	nbytes = hifn_setup_src_desc(dev, src, src_num);
+
+	buf = dma->command_bufs[dma->cmdi];
+
+	buf_pos = buf;
+
+	mask = 0;
+	switch (op) {
+		case ACRYPTO_OP_DECRYPT:
+			mask = HIFN_BASE_CMD_CRYPT | HIFN_BASE_CMD_DECODE;
+			break;
+		case ACRYPTO_OP_ENCRYPT:
+			mask = HIFN_BASE_CMD_CRYPT;
+			break;
+		case ACRYPTO_OP_HMAC:
+			mask = HIFN_BASE_CMD_MAC;
+			break;
+		default:
+			goto err_out;
+	}
+	
+	buf_pos += hifn_setup_base_command(dev, buf_pos, dlen, slen, mask, snum);
+	
+	if (op == ACRYPTO_OP_ENCRYPT || op == ACRYPTO_OP_DECRYPT) {
+		u16 md;
+		md = 0;
+		
+		if (key)
+			md |= HIFN_CRYPT_CMD_NEW_KEY;
+		if (iv && mode != ACRYPTO_MODE_ECB)
+			md |= HIFN_CRYPT_CMD_NEW_IV;
+
+		dprintk("%s: iv: %p [%d], key: %p [%d], mode: %u, op: %u, type: %u.\n",
+			dev->name, iv, ivsize, key, keysize,
+			mode, op, type);
+
+		switch (mode) {
+			case ACRYPTO_MODE_ECB:
+				md |= HIFN_CRYPT_CMD_MODE_ECB;
+				break;
+			case ACRYPTO_MODE_CBC:
+				md |= HIFN_CRYPT_CMD_MODE_CBC;
+				break;
+			case ACRYPTO_MODE_CFB:
+				md |= HIFN_CRYPT_CMD_MODE_CFB;
+				break;
+			case ACRYPTO_MODE_OFB:
+				md |= HIFN_CRYPT_CMD_MODE_OFB;
+				break;
+			default:
+				goto err_out;
+		}
+
+		switch (type) {
+			case ACRYPTO_TYPE_AES_128:
+				if (keysize != 16)
+					goto err_out;
+				md |= HIFN_CRYPT_CMD_KSZ_128 | HIFN_CRYPT_CMD_ALG_AES;
+				break;
+			case ACRYPTO_TYPE_AES_192:
+				if (keysize != 24)
+					goto err_out;
+				md |= HIFN_CRYPT_CMD_KSZ_192 | HIFN_CRYPT_CMD_ALG_AES;
+				break;
+			case ACRYPTO_TYPE_AES_256:
+				if (keysize != 32)
+					goto err_out;
+				md |= HIFN_CRYPT_CMD_KSZ_256 | HIFN_CRYPT_CMD_ALG_AES;
+				break;
+			default:
+				goto err_out;
+		}
+
+		buf_pos += hifn_setup_crypto_command(dev, buf_pos, dlen, slen, key, keysize, iv, ivsize, md);
+	}
+
+	dev->sa[sa_idx] = priv;
+	
+	cmd_len = buf_pos - buf;
+	dma->cmdr[dma->cmdi].l = __cpu_to_le32(cmd_len | HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ);
+
+	if (++dma->cmdi == HIFN_D_CMD_RSIZE) {
+		dma->cmdr[dma->cmdi].l = __cpu_to_le32(HIFN_MAX_COMMAND | HIFN_D_VALID | 
+				HIFN_D_LAST | HIFN_D_MASKDONEIRQ | HIFN_D_JUMP);
+		dma->cmdi = 0;
+	} else
+		dma->cmdr[dma->cmdi-1].l |= __cpu_to_le32(HIFN_D_VALID);
+	
+	if (!(dev->flags & HIFN_FLAG_CMD_BUSY)) {
+		hifn_write_1(dev, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_ENA);
+		dev->flags |= HIFN_FLAG_CMD_BUSY;
+	}
+	
+	hifn_setup_res_desc(dev);
+	hifn_setup_dst_desc(dev, dst, dst_num);
+
+	mb();
+
+	dev->active = 5;
+	err = 0;
+
+err_out:
+
+	if (err && printk_ratelimit())
+		printk("%s: iv: %p [%d], key: %p [%d], mode: %u, op: %u, type: %u.\n",
+			dev->name, iv, ivsize, key, keysize,
+			mode, op, type);
+
+	return err;
+}
+
+static int hifn_test(struct hifn_device *dev, int encdec, u8 snum)
+{
+	int n, err;
+	u8 key[16], *p;
+	u8 src[16];
+	struct scatterlist s;
+	u8 fips_aes_ecb_from_zero[16] = {
+		0x66, 0xE9, 0x4B, 0xD4, 
+		0xEF, 0x8A, 0x2C, 0x3B, 
+		0x88, 0x4C, 0xFA, 0x59, 
+		0xCA, 0x34, 0x2B, 0x2E};
+	
+	n = 0;
+	
+	dprintk("%s: session number=%02x:\n", (encdec)?"Encoding":"Decoding", snum);
+	
+	memset(src, 0, sizeof(src));
+	memset(key, 0, sizeof(key));
+
+	s.page 		= virt_to_page(src);
+	s.offset	= offset_in_page(src);
+	s.length	= sizeof(src);
+
+	err = hifn_setup_session(dev, 
+			&s, 1, 
+			&s, 1, 
+			key, sizeof(key), NULL, 0,
+			(encdec)?ACRYPTO_OP_ENCRYPT:ACRYPTO_OP_DECRYPT, 
+			ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_ECB,
+			snum, NULL);
+	if (err)
+		goto err_out;
+
+	msleep(200);
+
+#ifdef HIFN_DEBUG
+	{
+		struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+		
+		dprintk("cmd: i=%d, u=%d, k=%d\n", dma->cmdi, dma->cmdu, dma->cmdk);
+		dprintk("src: i=%d, u=%d, k=%d\n", dma->srci, dma->srcu, dma->srck);
+		dprintk("dst: i=%d, u=%d, k=%d\n", dma->dsti, dma->dstu, dma->dstk);
+		dprintk("res: i=%d, u=%d, k=%d\n", dma->resi, dma->resu, dma->resk);
+	}
+#endif
+
+	dprintk("%s: decoded: ", dev->name);
+	for (n=0; n<sizeof(src); ++n)
+		dprintk("%02x ", src[n]);
+	dprintk("\n");
+	dprintk("%s: FIPS   : ", dev->name);
+	for (n=0; n<sizeof(fips_aes_ecb_from_zero); ++n)
+		dprintk("%02x ", fips_aes_ecb_from_zero[n]);
+	dprintk("\n");
+
+	p = page_address(s.page) + s.offset;
+	if (!memcmp(p, fips_aes_ecb_from_zero, sizeof(fips_aes_ecb_from_zero))) {
+		printk(KERN_INFO "%s: AES 128 ECB test has been successfully passed.\n", dev->name);
+		return 0;
+	}
+	
+err_out:
+	printk(KERN_INFO "%s: AES 128 ECB test has been failed.\n", dev->name);
+	return -1;
+}
+
+static void hifn_work(struct work_struct *);
+
+static int hifn_start_device(struct hifn_device *dev)
+{
+	int err;
+
+	dev->ram_size = 32768;
+	
+	hifn_reset_dma(dev, 1);
+	
+	err = hifn_enable_crypto(dev);
+	if (err)
+		return err;
+
+	hifn_reset_puc(dev);
+
+	hifn_init_dma(dev);
+
+	hifn_init_registers(dev);
+
+	hifn_init_sessions(dev);
+	
+	hifn_init_pubrng(dev);
+
+	INIT_DELAYED_WORK(&dev->work, hifn_work);
+	schedule_delayed_work(&dev->work, HZ);
+	
+	return 0;
+}
+
+static void hifn_process_ready(struct ablkcipher_request *req, int error)
+{
+	req->base.complete(req->base.data, error);
+}
+
+static void hifn_check_for_completion(struct hifn_device *dev, int error)
+{
+	int i;
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+
+	for (i=0; i<HIFN_D_RES_RSIZE; ++i) {
+		struct hifn_desc *d = &dma->resr[i];
+
+		if (!(d->l & __cpu_to_le32(HIFN_D_VALID)) && dev->sa[i]) {
+			dev->success++;
+			dev->reset = 0;
+			hifn_process_ready(dev->sa[i], error);
+			
+			dev->started--;
+			BUG_ON(dev->started < 0);
+			
+			dev->sa[i] = NULL;
+		}
+
+		if (d->l & __cpu_to_le32(HIFN_D_DESTOVER | HIFN_D_OVER)) {
+			int j;
+
+			if (printk_ratelimit())
+				printk("%s: overflow detected [d: %u, o: %u] in %d resr: l: %08x, p: %08x.\n",
+					dev->name, !!(d->l & __cpu_to_le32(HIFN_D_DESTOVER)),
+					!!(d->l & __cpu_to_le32(HIFN_D_OVER)),
+					i, d->l, d->p);
+
+			for (j=0; j<HIFN_D_RES_RSIZE; ++j) {
+				struct hifn_desc *dst = &dma->resr[j];
+
+				if (printk_ratelimit())
+					printk("%s: over: i: %d, j: %d, d: %u, o: %u, l: %u, dlen: %u, l: %08x, p: %08x.\n",
+						dev->name, i, j,
+						!!(dst->l & __cpu_to_le32(HIFN_D_DESTOVER)), 
+						!!(dst->l & __cpu_to_le32(HIFN_D_OVER)), 
+						!!(dst->l & __cpu_to_le32(HIFN_D_LAST)),
+						dst->l & __cpu_to_le32(HIFN_D_LENGTH),
+						dst->l, dst->p);
+			}
+		}
+	}
+}
+
+static void hifn_clear_rings(struct hifn_device *dev)
+{
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	int i, u;
+	
+	dprintk("%s: ring cleanup 1: i: %d.%d.%d.%d, u: %d.%d.%d.%d, k: %d.%d.%d.%d.\n", 
+			dev->name,
+			dma->cmdi, dma->srci, dma->dsti, dma->resi,
+			dma->cmdu, dma->srcu, dma->dstu, dma->resu, 
+			dma->cmdk, dma->srck, dma->dstk, dma->resk); 
+	
+	i = dma->resk; u = dma->resu;
+	while (u != 0) {
+		if (dma->resr[i].l & __cpu_to_le32(HIFN_D_VALID))
+			break;
+
+		if (i != HIFN_D_RES_RSIZE)
+			u--;
+
+		if (++i == (HIFN_D_RES_RSIZE + 1))
+			i = 0;
+	}
+	dma->resk = i; dma->resu = u;
+
+	i = dma->srck; u = dma->srcu;
+	while (u != 0) {
+		if (i == HIFN_D_SRC_RSIZE)
+			i = 0;
+		if (dma->srcr[i].l & __cpu_to_le32(HIFN_D_VALID))
+			break;
+		i++, u--;
+	}
+	dma->srck = i; dma->srcu = u;
+
+	i = dma->cmdk; u = dma->cmdu;
+	while (u != 0) {
+		if (dma->cmdr[i].l & __cpu_to_le32(HIFN_D_VALID))
+			break;
+		if (i != HIFN_D_CMD_RSIZE)
+			u--;
+		if (++i == (HIFN_D_CMD_RSIZE + 1))
+			i = 0;
+	}
+	dma->cmdk = i; dma->cmdu = u;
+
+	i = dma->dstk; u = dma->dstu;
+	while (u != 0) {
+		if (i == HIFN_D_DST_RSIZE)
+			i = 0;
+		if (dma->dstr[i].l & __cpu_to_le32(HIFN_D_VALID))
+			break;
+		i++, u--;
+	}
+	dma->dstk = i; dma->dstu = u;
+
+	dprintk("%s: ring cleanup 2: i: %d.%d.%d.%d, u: %d.%d.%d.%d, k: %d.%d.%d.%d.\n", 
+			dev->name,
+			dma->cmdi, dma->srci, dma->dsti, dma->resi,
+			dma->cmdu, dma->srcu, dma->dstu, dma->resu, 
+			dma->cmdk, dma->srck, dma->dstk, dma->resk); 
+}
+
+static void hifn_work(struct work_struct *work)
+{
+	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct hifn_device *dev = container_of(dw, struct hifn_device, work);
+	unsigned long flags;
+	int reset = 0;
+	u32 r = 0;
+
+	spin_lock_irqsave(&dev->lock, flags);
+	if (dev->active == 0) {
+		struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+
+		if (dma->cmdu == 0 && (dev->flags & HIFN_FLAG_CMD_BUSY)) {
+			dev->flags &= ~HIFN_FLAG_CMD_BUSY;
+			r |= HIFN_DMACSR_C_CTRL_DIS;
+		}
+		if (dma->srcu == 0 && (dev->flags & HIFN_FLAG_SRC_BUSY)) {
+			dev->flags &= ~HIFN_FLAG_SRC_BUSY;
+			r |= HIFN_DMACSR_S_CTRL_DIS;
+		}
+		if (dma->dstu == 0 && (dev->flags & HIFN_FLAG_DST_BUSY)) {
+			dev->flags &= ~HIFN_FLAG_DST_BUSY;
+			r |= HIFN_DMACSR_D_CTRL_DIS;
+		}
+		if (dma->resu == 0 && (dev->flags & HIFN_FLAG_RES_BUSY)) {
+			dev->flags &= ~HIFN_FLAG_RES_BUSY;
+			r |= HIFN_DMACSR_R_CTRL_DIS;
+		}
+		if (r)
+			hifn_write_1(dev, HIFN_1_DMA_CSR, r);
+	} else
+		dev->active--;
+
+	if (dev->prev_success == dev->success && dev->started)
+		reset = 1;
+	dev->prev_success = dev->success;
+
+	dprintk("%s: r: %08x, active: %d, started: %d, wait: %lu, break: %lu, intr: %lu, success: %lu: dfailed: %lu.\n", 
+			dev->name, r, dev->active, dev->started,
+			dev->waiting, dev->break_session, dev->intr, dev->success, dev->dequeue_failed);
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	if (reset) {
+		if (++dev->reset >= 5) {
+			dprintk("%s: reset.\n", dev->name);
+			hifn_reset_dma(dev, 1);
+			hifn_stop_device(dev);
+			hifn_start_device(dev);
+		}
+		
+		spin_lock_irqsave(&dev->lock, flags);
+		hifn_check_for_completion(dev, -EBUSY);
+		hifn_clear_rings(dev);
+		dev->reset = 0;
+		spin_unlock_irqrestore(&dev->lock, flags);
+	}
+
+	schedule_delayed_work(&dev->work, HZ);
+}
+
+static irqreturn_t hifn_interrupt(int irq, void *data)
+{
+	struct hifn_device *dev = (struct hifn_device *)data;
+	struct hifn_dma *dma = (struct hifn_dma *)dev->desc_virt;
+	u32 dmacsr, restart;
+	
+	dmacsr = hifn_read_1(dev, HIFN_1_DMA_CSR);
+	
+	{
+		dmacsr = hifn_read_1(dev, HIFN_1_DMA_CSR);
+		/*
+		dprintk("%s: 1 dmacsr: %08x, dmareg: %08x, res: %08x [%d.%d], i: %d.%d.%d.%d, u: %d.%d.%d.%d.\n", 
+			dev->name, dmacsr, dev->dmareg, dmacsr & dev->dmareg, dma->cmdi, cmdi_calc,
+			dma->cmdu, dma->srcu, dma->dstu, dma->resu, 
+			dma->cmdi, dma->srci, dma->dsti, dma->resi);
+		*/
+
+		dprintk("%s: 1 dmacsr: %08x, dmareg: %08x, res: %08x [%d], i: %d.%d.%d.%d, u: %d.%d.%d.%d.\n", 
+			dev->name, dmacsr, dev->dmareg, dmacsr & dev->dmareg, dma->cmdi,
+			dma->cmdu, dma->srcu, dma->dstu, dma->resu, 
+			dma->cmdi, dma->srci, dma->dsti, dma->resi);
+	}
+	
+	if ((dmacsr & dev->dmareg) == 0)
+		goto out;
+
+	dev->intr++;
+
+	hifn_write_1(dev, HIFN_1_DMA_CSR, dmacsr & dev->dmareg);
+	
+	if (dmacsr & HIFN_DMACSR_ENGINE)
+		hifn_write_0(dev, HIFN_0_PUISR, hifn_read_0(dev, HIFN_0_PUISR));
+	if (dmacsr & HIFN_DMACSR_PUBDONE)
+		hifn_write_1(dev, HIFN_1_PUB_STATUS, hifn_read_1(dev, HIFN_1_PUB_STATUS) | HIFN_PUBSTS_DONE);
+	
+	restart = dmacsr & (HIFN_DMACSR_R_OVER | HIFN_DMACSR_D_OVER);
+	if (restart) {
+		u32 puisr = hifn_read_0(dev, HIFN_0_PUISR);
+		
+		if (printk_ratelimit())
+			printk("%s: overflow: r: %d, d: %d, puisr: %08x, d: %u.\n", 
+				dev->name, !!(dmacsr & HIFN_DMACSR_R_OVER), 
+				!!(dmacsr & HIFN_DMACSR_D_OVER),
+				puisr, !!(puisr & HIFN_PUISR_DSTOVER));
+		if (!!(puisr & HIFN_PUISR_DSTOVER))
+			hifn_write_0(dev, HIFN_0_PUISR, HIFN_PUISR_DSTOVER);
+		hifn_write_1(dev, HIFN_1_DMA_CSR, dmacsr & (HIFN_DMACSR_R_OVER | HIFN_DMACSR_D_OVER));
+	}
+	
+	restart = dmacsr & (HIFN_DMACSR_C_ABORT | HIFN_DMACSR_S_ABORT | HIFN_DMACSR_D_ABORT | HIFN_DMACSR_R_ABORT);
+	if (restart) {
+		if (printk_ratelimit())
+			printk("%s: abort: c: %d, s: %d, d: %d, r: %d.\n", 
+				dev->name, !!(dmacsr & HIFN_DMACSR_C_ABORT), !!(dmacsr & HIFN_DMACSR_S_ABORT), 
+				!!(dmacsr & HIFN_DMACSR_D_ABORT), !!(dmacsr & HIFN_DMACSR_R_ABORT));
+		hifn_reset_dma(dev, 1);
+		hifn_init_dma(dev);
+		hifn_init_registers(dev);
+	}
+	
+	if ((dmacsr & HIFN_DMACSR_C_WAIT) && (dma->cmdu == 0)) {
+		dev->waiting++;
+		dprintk("%s: wait on command.\n", dev->name);
+		dev->dmareg &= ~(HIFN_DMAIER_C_WAIT);
+		hifn_write_1(dev, HIFN_1_DMA_IER, dev->dmareg);
+	}
+
+	hifn_check_for_completion(dev, 0);
+	hifn_clear_rings(dev);
+
+out:
+	return IRQ_HANDLED;
+}
+
+static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	struct hifn_device *dev = crypto_tfm_ctx(tfm);
+
+	if (len > HIFN_MAX_CRYPT_KEY_LENGTH)
+		return -1;
+
+	if (!memcmp(dev->current_key, key, len)) {
+		dev->flags |= HIFN_FLAG_OLD_KEY;
+		return 0;
+	}
+
+	dev->flags &= ~HIFN_FLAG_OLD_KEY;
+
+	memcpy(dev->current_key, key, len);
+	dev->current_key_len = len;
+
+	return 0;
+}
+
+static int hifn_encrypt(struct ablkcipher_request *req)
+{
+	struct hifn_device *dev = ablkcipher_request_ctx(req);
+	int err = -EBUSY;
+	unsigned long flags;
+	u8 type = ACRYPTO_TYPE_AES_128;
+
+	/*
+	 * Type and mode must be obtained from tfm/req.
+	 */
+
+	if (dev->current_key_len == 16)
+		type = ACRYPTO_TYPE_AES_128;
+	else if (dev->current_key_len == 24)
+		type = ACRYPTO_TYPE_AES_192;
+	else if (dev->current_key_len == 32)
+		type = ACRYPTO_TYPE_AES_256;
+	
+	spin_lock_irqsave(&dev->lock, flags);
+	if (dev->started < HIFN_QUEUE_LENGTH) {
+		err = hifn_setup_session(dev, req->src, 1, req->dst, 1, 
+				(dev->flags & HIFN_FLAG_OLD_KEY)?NULL:dev->current_key, dev->current_key_len,
+				NULL, 0, 
+				ACRYPTO_OP_ENCRYPT, type, ACRYPTO_MODE_ECB, dev->snum, req);
+		dev->snum++;
+		dev->started++;
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+
+	/*
+	 * HEAVY TODO: needs to kick Herbert XU to write documentation.
+	 * HEAVY TODO: needs to kick Herbert XU to write documentation.
+	 * HEAVY TODO: needs to kick Herbert XU to write documentation.
+	 *
+	 * Actually I need to think about how to handle the case, when queue is full.
+	 * So far error (-EINVAL) is returned.
+	 */
+
+	return err;
+}
+
+static int hifn_decrypt(struct ablkcipher_request *req)
+{
+	struct hifn_device *dev = ablkcipher_request_ctx(req);
+	int err = -EBUSY;
+	unsigned long flags;
+	u8 type = ACRYPTO_TYPE_AES_128;
+
+	if (dev->current_key_len == 16)
+		type = ACRYPTO_TYPE_AES_128;
+	else if (dev->current_key_len == 24)
+		type = ACRYPTO_TYPE_AES_192;
+	else if (dev->current_key_len == 32)
+		type = ACRYPTO_TYPE_AES_256;
+	
+	spin_lock_irqsave(&dev->lock, flags);
+	if (dev->started < HIFN_QUEUE_LENGTH) {
+		err = hifn_setup_session(dev, req->src, 1, req->dst, 1, 
+				(dev->flags & HIFN_FLAG_OLD_KEY)?NULL:dev->current_key, dev->current_key_len,
+				NULL, 0, 
+				ACRYPTO_OP_DECRYPT, ACRYPTO_TYPE_AES_128, ACRYPTO_MODE_ECB, dev->snum, req);
+		dev->snum++;
+		dev->started++;
+	}
+	spin_unlock_irqrestore(&dev->lock, flags);
+	return err;
+}
+
+
+#define AES_MIN_KEY_SIZE	16
+#define AES_MAX_KEY_SIZE	32
+#define AES_BLOCK_SIZE		16
+
+static struct crypto_alg hifn_alg = {
+	.cra_name		=	"aes",
+	.cra_driver_name	=	"hifn-aes",
+	.cra_priority		=	200,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct hifn_device),
+	.cra_alignmask		=	15,
+	.cra_type		=	&crypto_blkcipher_type,
+	.cra_module		=	THIS_MODULE,
+	.cra_u			=	{
+		.ablkcipher = {
+			.min_keysize		=	AES_MIN_KEY_SIZE,
+			.max_keysize		=	AES_MAX_KEY_SIZE,
+			.setkey			=	hifn_setkey,
+			.encrypt		=	hifn_encrypt,
+			.decrypt		=	hifn_decrypt,
+		}
+	}
+};
+
+static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int err, i;
+	struct hifn_device *dev;
+	
+	err = pci_enable_device(pdev);
+	if (err) {
+		dprintk("%s: Failed to enable device.\n", pci_name(pdev));
+		return err;
+	}
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err) {
+		dprintk("%s: Failed to setup DMA.\n", pci_name(pdev));
+		goto err_out_disable_pci_device;
+	}
+
+	err = pci_request_regions(pdev, "hifn59x");
+	if (err) {
+		dprintk("%s: Failed to request I/O regions.\n", pci_name(pdev));
+		goto err_out_disable_pci_device;
+	}
+	
+	if (pci_resource_len(pdev, 0) < HIFN_BAR0_SIZE ||
+	    pci_resource_len(pdev, 1) < HIFN_BAR1_SIZE ||
+	    pci_resource_len(pdev, 2) < HIFN_BAR2_SIZE) {
+		dprintk("%s: Broken hardware - I/O regions are too small.\n", pci_name(pdev));
+		err = -ENODEV;
+		goto err_out_free_regions;
+	}
+
+	dev = (struct hifn_device *)kzalloc(sizeof(struct hifn_device) + sizeof(struct crypto_alg), 
+			GFP_KERNEL);
+	if (!dev) {
+		dprintk("Failed to allocate hifn_device structure.\n");
+		err = -ENOMEM;
+		goto err_out_free_regions;
+	}
+	
+	dev->alg = (struct crypto_alg *)(dev + 1);
+
+	snprintf(dev->name, sizeof(dev->name), "hifn%d", hifn_dev_number);
+	spin_lock_init(&dev->lock);
+
+	for (i=0; i<3; ++i) {
+		unsigned long addr, size;
+
+		addr = pci_resource_start(pdev, i);
+		size = pci_resource_len(pdev, i);
+
+		dev->bar[i] = ioremap_nocache(addr, size);
+		if (!dev->bar[i]) {
+			dprintk("Failed to remap %d PCI bar: addr=0x%lx, size=0x%lx.\n", 
+					i, addr, size);
+			goto err_out_unmap_bars;
+		}
+	}
+
+	dev->result_mem = __get_free_pages(GFP_KERNEL, HIFN_MAX_RESULT_ORDER);
+	if (!dev->result_mem) {
+		dprintk("Failed to allocate %d pages for result_mem.\n", HIFN_MAX_RESULT_ORDER);
+		goto err_out_unmap_bars;
+	}
+	memset((void *)dev->result_mem, 0, PAGE_SIZE*(1<<HIFN_MAX_RESULT_ORDER));
+
+	dev->dst = pci_map_single(pdev, (void *)dev->result_mem, PAGE_SIZE << HIFN_MAX_RESULT_ORDER, PCI_DMA_FROMDEVICE);
+
+	dev->desc_virt = pci_alloc_consistent(pdev, sizeof(struct hifn_dma), &dev->desc_dma);
+	if (!dev->desc_virt) {
+		dprintk("Failed to allocate descriptor rings.\n");
+		goto err_out_free_result_pages;
+	}
+	memset(dev->desc_virt, 0, sizeof(struct hifn_dma));
+
+	dev->pdev = pdev;
+	dev->irq = pdev->irq;
+
+	for (i=0; i<HIFN_D_RES_RSIZE; ++i)
+		dev->sa[i] = NULL;
+
+	pci_set_drvdata(pdev, dev);
+	
+	err = request_irq(dev->irq, hifn_interrupt, SA_SHIRQ, dev->name, dev);
+	if (err) {
+		dprintk("Failed to request IRQ%d: err=%d.\n", dev->irq, err);
+		dev->irq = 0;
+		goto err_out_free_desc;
+	}
+
+	err = hifn_start_device(dev);
+	if (err)
+		goto err_out_free_irq;
+
+	err = hifn_test(dev, 1, 0);
+	if (err)
+		goto err_out_stop_device;
+
+
+	memcpy(dev->alg, &hifn_alg, sizeof(struct crypto_alg));
+	snprintf(dev->alg->cra_driver_name, sizeof(dev->alg->cra_driver_name), "hifn-aes-%d", hifn_dev_number);
+	INIT_LIST_HEAD(&dev->alg->cra_list);
+
+	err = crypto_register_alg(dev->alg);
+	if (err)
+		goto err_out_stop_device;
+
+	hifn_dev_number++;
+
+	dprintk("HIFN crypto accelerator card at %s has been successfully registered as %s: id=%08x.\n",
+			pci_name(pdev), dev->name, hifn_read_1(dev, HIFN_CHIP_ID));
+
+	return 0;
+
+err_out_stop_device:
+	hifn_reset_dma(dev, 1);
+	hifn_stop_device(dev);
+err_out_free_irq:
+	free_irq(dev->irq, dev->name);
+err_out_free_desc:
+	pci_free_consistent(pdev, sizeof(struct hifn_dma), dev->desc_virt, dev->desc_dma);
+
+err_out_free_result_pages:
+	pci_unmap_single(pdev, dev->dst, PAGE_SIZE << HIFN_MAX_RESULT_ORDER, PCI_DMA_FROMDEVICE);
+	free_pages(dev->result_mem, HIFN_MAX_RESULT_ORDER);
+
+err_out_unmap_bars:
+	for (i=0; i<3; ++i)
+		if (dev->bar[i])
+			iounmap(dev->bar[i]);
+	
+err_out_free_regions:
+	pci_release_regions(pdev);
+	
+err_out_disable_pci_device:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void hifn_remove(struct pci_dev *pdev)
+{
+	int i;
+	struct hifn_device *dev;
+	
+	dev = pci_get_drvdata(pdev);
+
+	if (dev) {
+		cancel_rearming_delayed_work(&dev->work);
+		flush_scheduled_work();
+
+		crypto_unregister_alg(dev->alg);
+		hifn_reset_dma(dev, 1);
+		hifn_stop_device(dev);
+
+		free_irq(dev->irq, dev->name);
+		pci_free_consistent(pdev, sizeof(struct hifn_dma), dev->desc_virt, dev->desc_dma);
+		pci_unmap_single(pdev, dev->dst, PAGE_SIZE << HIFN_MAX_RESULT_ORDER, PCI_DMA_FROMDEVICE);
+		free_pages(dev->result_mem, HIFN_MAX_RESULT_ORDER);
+		for (i=0; i<3; ++i)
+			if (dev->bar[i])
+				iounmap(dev->bar[i]);
+
+		kfree(dev);
+	}
+	
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_device_id hifn_pci_tbl[] = {
+	{ PCI_VENDOR_ID_HIFN, PCI_DEVICE_ID_HIFN_7955, PCI_ANY_ID, PCI_ANY_ID },
+	{ PCI_VENDOR_ID_HIFN, PCI_DEVICE_ID_HIFN_7956, PCI_ANY_ID, PCI_ANY_ID },
+	{ 0 }
+};
+MODULE_DEVICE_TABLE(pci, hifn_pci_tbl);
+
+static struct pci_driver hifn_pci_driver = {
+	.name     = "hifn795x",
+	.id_table = hifn_pci_tbl,
+	.probe    = hifn_probe,
+	.remove   = __devexit_p(hifn_remove),
+};
+
+static int __devinit hifn_init(void)
+{
+	int err;
+	
+	err = pci_register_driver(&hifn_pci_driver);
+	if (err < 0) {
+		dprintk("Failed to register PCI driver for %s device.\n", hifn_pci_driver.name);
+		return -ENODEV;
+	}
+
+	printk(KERN_INFO "Driver for HIFN 795x crypto accelerator chip has been successfully registered.\n");
+	
+	return 0;
+}
+
+static void __devexit hifn_fini(void)
+{
+	pci_unregister_driver(&hifn_pci_driver);
+	
+	printk(KERN_INFO "Driver for HIFN 795x crypto accelerator chip has been successfully deregistered.\n");
+}
+
+module_init(hifn_init);
+module_exit(hifn_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_DESCRIPTION("Driver for HIFN 795x crypto accelerator chip.");

-- 
	Evgeniy Polyakov

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-22 12:58 [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi Evgeniy Polyakov
@ 2007-05-22 15:19 ` Sebastian Siewior
  2007-05-23  8:03   ` Evgeniy Polyakov
  2007-05-25  8:14 ` Herbert Xu
  1 sibling, 1 reply; 15+ messages in thread
From: Sebastian Siewior @ 2007-05-22 15:19 UTC (permalink / raw)
  To: linux-crypto; +Cc: Evgeniy Polyakov

* Evgeniy Polyakov | 2007-05-22 16:58:29 [+0400]:

>Current driver only supports AES ECB encrypt/decrypt, since I do not 
>know how to detect operation mode in runtime (a question).
Take a look on my skeleton driver (posted just a few seconds ago). It
should solve your problem here.

Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-22 15:19 ` Sebastian Siewior
@ 2007-05-23  8:03   ` Evgeniy Polyakov
  2007-05-23 10:02     ` Sebastian Siewior
  2007-05-25  8:21     ` Herbert Xu
  0 siblings, 2 replies; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-23  8:03 UTC (permalink / raw)
  To: linux-crypto; +Cc: Sebastian Siewior

On Tue, May 22, 2007 at 05:19:19PM +0200, Sebastian Siewior (linux-crypto@ml.breakpoint.cc) wrote:
> * Evgeniy Polyakov | 2007-05-22 16:58:29 [+0400]:
> 
> >Current driver only supports AES ECB encrypt/decrypt, since I do not 
> >know how to detect operation mode in runtime (a question).
> Take a look on my skeleton driver (posted just a few seconds ago). It
> should solve your problem here.

It does not - your code only supposed to work with ecb, since it is what
was requested during initialization time. This new scheme with templates
helps alot for ciphers/crypto modes which do not support several
templates, so I used old scheme with 'cipher' only template, not
'ecb(cipher)', 'cbc(cipher)' and so on.

And, btw, do not use mutex in encryption path, it is not supposed to
sleep in ipsec.

HIFN supports at least 12 different ciphers/mode (3des, des and aes,
each one with 4 modes), so it is not a good idea to put them all into
separated structures, so I rised a question about it.

> Sebastian

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-23  8:03   ` Evgeniy Polyakov
@ 2007-05-23 10:02     ` Sebastian Siewior
  2007-05-23 12:30       ` Evgeniy Polyakov
  2007-05-25  8:31       ` Herbert Xu
  2007-05-25  8:21     ` Herbert Xu
  1 sibling, 2 replies; 15+ messages in thread
From: Sebastian Siewior @ 2007-05-23 10:02 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto

* Evgeniy Polyakov | 2007-05-23 12:03:54 [+0400]:

>On Tue, May 22, 2007 at 05:19:19PM +0200, Sebastian Siewior (linux-crypto@ml.breakpoint.cc) wrote:
>> * Evgeniy Polyakov | 2007-05-22 16:58:29 [+0400]:
>> 
>> >Current driver only supports AES ECB encrypt/decrypt, since I do not 
>> >know how to detect operation mode in runtime (a question).
>> Take a look on my skeleton driver (posted just a few seconds ago). It
>> should solve your problem here.
>
>It does not - your code only supposed to work with ecb, since it is what
>was requested during initialization time. This new scheme with templates
>helps alot for ciphers/crypto modes which do not support several
>templates, so I used old scheme with 'cipher' only template, not
>'ecb(cipher)', 'cbc(cipher)' and so on.
Maybe I missed the point. I am confused with your mutex comment anyway
(but later mode).
It is also possible that I interpreted Herbert's code the wrong
way. Let me comment the obvious part of the skeleton code which I thing
you overlooked (If you didn't than I didn't catch up with in the first
place or missed the goal of the async API).

Register 12 struct crypto_alg, each with unique functions for
aes|3des|.. + ecb|cbc|.. + encrypto|decrypt (I agree with you, that you
prefer 4 instead of 12 since most of the attributes are the same).
Now, algo_decrypt_ecb_async() is doing just:
  return enqueue_request(req, algo_request_decrypt);

consider algo_request_decrypt as request_aes_decrypt_ecb. This function
(algo_request_decrypt) only calls blkcipher_crypt(...., HW_DECRYPT_ECB)
which calls the HW directly. You see that way what is requested
(AES+ECB+ENCRYPT).

Instead of calling a function pointer, you could shorten the code by
enqueue_request(..., HW_DECRYPT_ECB) directly and call blkcipher_crypt()
from process_requests_thread() with the correct operation type. However
the encrypt/decrypt process happens in a seperate kthread.

I took a deeper look on your code and it seems to me, that you might
still use the sync API. Your .cra_type is still crypto_blkcipher_type.
Your code might actually be broken because you set up a struct
ablkcipher_alg but the crypto might threat it as struct blkcipher_alg.
Check /proc/crypto, your supported keysizes should be wrong.

>And, btw, do not use mutex in encryption path, it is not supposed to
>sleep in ipsec.
I am aware of that but again: I might be totally wrong. Herbert
introduced a async API. My understanding was that he wants to queue
encrypt+decrypt (not setkey) and process it later in a dedicated thread.
On the other hand: what is async when still evrything happny sync.
He's tcrypt code queues a request, and calls
wait_for_completion_interruptible() so he does sleep and waits until the
cipher finishes (in a seperate kthread). However this is only the case
if crypto_ablkcipher_XXX() returns with -EINPROGRESS or -EBUSY. In case
of 0 he expects a synchron processing. I assume in case of -EBUSY, the
caller has to put the request once again in the queue (at a later time,
probably after a request completed. This looks little dodgy to me,
because it may be the first request).
However, if the caller is requesting a async algo, he knows that he
might go to bed in the meantime.
*I* have to sleep while handling a crypto request over to the hardware.
My understanding of Herbert's async crypto API was a blessing :) In case
of IPsec I am actually thinking how to fix this and not break anything
(in terms of performance and hacky code).

>HIFN supports at least 12 different ciphers/mode (3des, des and aes,
>each one with 4 modes), so it is not a good idea to put them all into
>separated structures, so I rised a question about it.
This might be cool.

>
>> Sebastian
>
>-- 
>	Evgeniy Polyakov
Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-23 10:02     ` Sebastian Siewior
@ 2007-05-23 12:30       ` Evgeniy Polyakov
  2007-05-25  8:31       ` Herbert Xu
  1 sibling, 0 replies; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-23 12:30 UTC (permalink / raw)
  To: linux-crypto

Hi, Sebastian.

On Wed, May 23, 2007 at 12:02:44PM +0200, Sebastian Siewior (linux-crypto@ml.breakpoint.cc) wrote:
> It is also possible that I interpreted Herbert's code the wrong
> way. Let me comment the obvious part of the skeleton code which I thing
> you overlooked (If you didn't than I didn't catch up with in the first
> place or missed the goal of the async API).
> 
> Register 12 struct crypto_alg, each with unique functions for
> aes|3des|.. + ecb|cbc|.. + encrypto|decrypt (I agree with you, that you
> prefer 4 instead of 12 since most of the attributes are the same).
> Now, algo_decrypt_ecb_async() is doing just:
>   return enqueue_request(req, algo_request_decrypt);

That is what I want to avoid.

> consider algo_request_decrypt as request_aes_decrypt_ecb. This function
> (algo_request_decrypt) only calls blkcipher_crypt(...., HW_DECRYPT_ECB)
> which calls the HW directly. You see that way what is requested
> (AES+ECB+ENCRYPT).
> 
> Instead of calling a function pointer, you could shorten the code by
> enqueue_request(..., HW_DECRYPT_ECB) directly and call blkcipher_crypt()
> from process_requests_thread() with the correct operation type. However
> the encrypt/decrypt process happens in a seperate kthread.

My point is not to introduce a lot of structures and functions, which
are essentially (read: exactly) the same, but instead get crypto processing 
mode from the tfm/whatever structure. Your code only registers ecb, but to 
fully support crypto capabilities for given device the same structure (but 
with different functions and template strings) must be registered for every
device for every cipher/digest and every mode and probably even for
every key size, but I'm not sure about the latter though.
That is what I want to avoid.

> I took a deeper look on your code and it seems to me, that you might
> still use the sync API. Your .cra_type is still crypto_blkcipher_type.
> Your code might actually be broken because you set up a struct
> ablkcipher_alg but the crypto might threat it as struct blkcipher_alg.
> Check /proc/crypto, your supported keysizes should be wrong.

Thanks for pointing, that must be ablkcpiher_type, I've fixed that typo.

> >And, btw, do not use mutex in encryption path, it is not supposed to
> >sleep in ipsec.
> I am aware of that but again: I might be totally wrong. Herbert
> introduced a async API. My understanding was that he wants to queue
> encrypt+decrypt (not setkey) and process it later in a dedicated thread.
> On the other hand: what is async when still evrything happny sync.

Ah, then your code only works with dedicated thread, which is not needed
for true hardware, which works in interrupt mode, since register setup
is quite fast compared to rescheduling to dedicated thread, so it is not
needed, and instead registers setup is performed in ->encrypt/->decrypt
callbacks and completion function is called (with disabled interrupts)
from interrupt handler.

> *I* have to sleep while handling a crypto request over to the hardware.

No, you have not. Check acrypto sources and how it is implemented for
example for hifn driver and ipsec stack.

> My understanding of Herbert's async crypto API was a blessing :) In case

I'm about to disagree, last time we talked with Herbert about async 
cryptoapi design we failed to find a solution, suitable for both points of 
view. :-)

> of IPsec I am actually thinking how to fix this and not break anything
> (in terms of performance and hacky code).

In case of async IPsec you might check acrypto sources, which have async
ipsec support quite for a while, but it is hacky indeed - I needed to
heavily change ipsec processing code both in input and output to make it
possible to work without any sleeps and with 'interrupted' crypto processing. 
It works not slower than native code, although I only did esp4.

> Sebastian

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-22 12:58 [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi Evgeniy Polyakov
  2007-05-22 15:19 ` Sebastian Siewior
@ 2007-05-25  8:14 ` Herbert Xu
  2007-05-25  8:55   ` Evgeniy Polyakov
  1 sibling, 1 reply; 15+ messages in thread
From: Herbert Xu @ 2007-05-25  8:14 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto

HI Evgeniy:

On Tue, May 22, 2007 at 12:58:29PM +0000, Evgeniy Polyakov wrote:
> 
> This is preliminary driver for HIFN 795x crypto accelerator chip.

Thanks for the working on this!

> Likely it is not even a request for testing, since I see at least one
> problem with current approach: what to do when crypto hardware queue is
> full and no new packets can arrive? Current code just returns an error
> -EINVAL if there is real error and -EBUSY if queue is full.

Each device should have a crypto_queue dedicated to it to handle this
situation.  So when the hardware queue is full you start filling up
the software crypto_queue using crypto_enqueue_request.  When that
becomes full the caller either gets an error or it can enqueue one
last request and then block by setting the CRYPTO_TFM_REQ_MAY_BACKLOG
flag.  In either case it'll get back a -EBUSY error.

When your hardware queue is drained you should try to refill it by
calling crypto_dequeue_request.

> Due to problems with interrupt storms and possible adapter freeze
> (sorry, but HIFN spec I have really sucks, so likely it is programming
> error, but who knows) I added special watchdog, which fires if after
> predefined timeout sessions which are supposed to be completed are not.
> In that case callback is invoked with -EBUSY error.

Yes we do need watchdogs for all hardware devices to handle situations
like this.  Feel free to add helpers to the API to aid drivers in
handling this.

> This driver supports old-style crypto_alg with "aes" string only, and I
> would like to rise a discussion of the needs to support several
> structures for cbc(aes), ecb(aes) and so on, since some hardware
> supports plenty of modes, and allocating set of structures for each
> hardware adapter found in the system would be an overkill.

It was an explicit design decision to avoid using bitmasks.  Just as
we use strings as the unique key to identify algorithms rather than
integers as that provides the freedom for expansion, we now use strings
to describe cipher modes rather than bitmasks.  There are numerous
new cipher modes in recent years and there is no way we're going back
to describing these using bitmasks again.

As to allocating an object for each algorithm that you support being
an overkill, are you concerned about the data size?  That shouldn't
be an issue because you'd only have one such object per algorithm
per device and they really aren't that big.

If you're worried about duplicate code then we can probably look at
providing helpers to eliminate as much of that as possible.  Have a
look at padlock/s390 for example.  They handle these in a fairly
sane way.

> Current driver only supports AES ECB encrypt/decrypt, since I do not 
> know how to detect operation mode in runtime (a question).

For each incoming request you have an associated tfm object which has
a link to the algorithm object.  The algorithm object provides you
the info you need to know which algorithm to use and the tfm object
provides the session-specific information which would be the key for
ciphers.

> Another issue unknown issue is a possibility to call setkey() the same
> time encrypt/decrypt is called. As far as I can see it can not be done,
> but I may be wrong, if so, small changes are needed in hifn_setkey
> (mainly operation must be done under dev->lock).

Indeed users should not call setkey while there are still outstanding
operations.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-23  8:03   ` Evgeniy Polyakov
  2007-05-23 10:02     ` Sebastian Siewior
@ 2007-05-25  8:21     ` Herbert Xu
  2007-05-25  9:00       ` Evgeniy Polyakov
  1 sibling, 1 reply; 15+ messages in thread
From: Herbert Xu @ 2007-05-25  8:21 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto, linux-crypto

Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:
> 
> It does not - your code only supposed to work with ecb, since it is what
> was requested during initialization time. This new scheme with templates
> helps alot for ciphers/crypto modes which do not support several
> templates, so I used old scheme with 'cipher' only template, not
> 'ecb(cipher)', 'cbc(cipher)' and so on.

I just had a look and your driver should use the cra_name of "ecb(aes)"
since ECB is what it implements.  The cra_driver_name can be ecb-aes-hifn
(if there can only be one hifn device, otherwise make it something like
ecb-aes-hifn-<devname> or ecb-aes-<devname>).

Your priority should also be above 200 which is where assembly-optimised
software algorithms are registered at by default.  I suggest 300.

> HIFN supports at least 12 different ciphers/mode (3des, des and aes,
> each one with 4 modes), so it is not a good idea to put them all into
> separated structures, so I rised a question about it.

Well it is a trade-off of a bit of work here vs. the ability to better
support new cipher modes that arise.  Could we perhaps use macro helpers
so that you don't have to type out each one by hand?

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-23 10:02     ` Sebastian Siewior
  2007-05-23 12:30       ` Evgeniy Polyakov
@ 2007-05-25  8:31       ` Herbert Xu
  1 sibling, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2007-05-25  8:31 UTC (permalink / raw)
  To: Sebastian Siewior; +Cc: johnpol, linux-crypto

Sebastian Siewior <linux-crypto@ml.breakpoint.cc> wrote:
 
>>And, btw, do not use mutex in encryption path, it is not supposed to
>>sleep in ipsec.
> I am aware of that but again: I might be totally wrong. Herbert
> introduced a async API. My understanding was that he wants to queue
> encrypt+decrypt (not setkey) and process it later in a dedicated thread.
> On the other hand: what is async when still evrything happny sync.
> He's tcrypt code queues a request, and calls

Evgeniy is absolutely right here.

While it is true that the whole point of async is so that you can go
away and do something else while the crypto operation is in place,
it does not mean that the crypto driver itself can sleep when handling
a request.

For example, the caller might be IPsec which runs in an unsleepable BH
context.  It wants to queue the packet for processing and have the
crypto layer call it back when it's done.  There is no explicit sleeping
involved here.

So if you need locking you should use something like a spin lock.

> wait_for_completion_interruptible() so he does sleep and waits until the
> cipher finishes (in a seperate kthread). However this is only the case
> if crypto_ablkcipher_XXX() returns with -EINPROGRESS or -EBUSY. In case

Of course the caller can sleep if it is safe.  tcrypt always runs in
process context which is why it can afford to sleep here.  IPsec on
the other hand would not be able to do that.

>>HIFN supports at least 12 different ciphers/mode (3des, des and aes,
>>each one with 4 modes), so it is not a good idea to put them all into
>>separated structures, so I rised a question about it.
> This might be cool.

I'm all for helpers to reduce typing here :)

However, we really do need separate objects for each block cipher
since from the crypto API's point of view they're totally unrelated.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  8:14 ` Herbert Xu
@ 2007-05-25  8:55   ` Evgeniy Polyakov
  2007-05-25  9:35     ` Sebastian Siewior
  2007-05-25 11:01     ` Herbert Xu
  0 siblings, 2 replies; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-25  8:55 UTC (permalink / raw)
  To: Herbert Xu; +Cc: linux-crypto

Hi Herbert.

On Fri, May 25, 2007 at 06:14:17PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> > Likely it is not even a request for testing, since I see at least one
> > problem with current approach: what to do when crypto hardware queue is
> > full and no new packets can arrive? Current code just returns an error
> > -EINVAL if there is real error and -EBUSY if queue is full.
> 
> Each device should have a crypto_queue dedicated to it to handle this
> situation.  So when the hardware queue is full you start filling up
> the software crypto_queue using crypto_enqueue_request.  When that
> becomes full the caller either gets an error or it can enqueue one
> last request and then block by setting the CRYPTO_TFM_REQ_MAY_BACKLOG
> flag.  In either case it'll get back a -EBUSY error.
> 
> When your hardware queue is drained you should try to refill it by
> calling crypto_dequeue_request.

Well, it is just hardware queue increase, so essentially for correct
work it should return -EBUSY in case driver does not accept requests
anymore (no matter if they are pushed into hardware or linked into
backlog queue). According to sleeping with CRYPTO_TFM_REQ_MAY_BACKLOG -
what about ipsec, where it is not allowed to sleep?
dm-crypt as the only user of async cryptoapi is allowed to sleep, but
I'm sure eventually ipsec will be converted (or heavily
hacked/uglymoroned like I did in acrypto) into async mode too, but
netowrk processing does not sleep at all. I do not think creating
dedicated thread for ipsec processing is a good idea, but who knows...

> > Due to problems with interrupt storms and possible adapter freeze
> > (sorry, but HIFN spec I have really sucks, so likely it is programming
> > error, but who knows) I added special watchdog, which fires if after
> > predefined timeout sessions which are supposed to be completed are not.
> > In that case callback is invoked with -EBUSY error.
> 
> Yes we do need watchdogs for all hardware devices to handle situations
> like this.  Feel free to add helpers to the API to aid drivers in
> handling this.

It is doable and likely needs to be pushed into generic code, but I will
postpone it until this driver is ready.

> > This driver supports old-style crypto_alg with "aes" string only, and I
> > would like to rise a discussion of the needs to support several
> > structures for cbc(aes), ecb(aes) and so on, since some hardware
> > supports plenty of modes, and allocating set of structures for each
> > hardware adapter found in the system would be an overkill.
> 
> It was an explicit design decision to avoid using bitmasks.  Just as
> we use strings as the unique key to identify algorithms rather than
> integers as that provides the freedom for expansion, we now use strings
> to describe cipher modes rather than bitmasks.  There are numerous
> new cipher modes in recent years and there is no way we're going back
> to describing these using bitmasks again.
> 
> As to allocating an object for each algorithm that you support being
> an overkill, are you concerned about the data size?  That shouldn't
> be an issue because you'd only have one such object per algorithm
> per device and they really aren't that big.
> 
> If you're worried about duplicate code then we can probably look at
> providing helpers to eliminate as much of that as possible.  Have a
> look at padlock/s390 for example.  They handle these in a fairly
> sane way.

I mostly worry about allocation/freeing/init code amount, memory 
overhead is not that big, since amount of devices is limited.
One says lazyness is a progress engine, but somtimes I do not agree :)

> > Current driver only supports AES ECB encrypt/decrypt, since I do not 
> > know how to detect operation mode in runtime (a question).
> 
> For each incoming request you have an associated tfm object which has
> a link to the algorithm object.  The algorithm object provides you
> the info you need to know which algorithm to use and the tfm object
> provides the session-specific information which would be the key for
> ciphers.

That is how crypto processing is being done, but there is no information
about how blocks are managed, i.e. were they chained into cbc or just
one-by-one in ecb. As far as I can see, there is no such knowledge until
algorithm was registered with new syle scheme with ecb(algo)/cbc(algo)
strings and so on, in that case there are different strings and/or
function pointers.

> > Another issue unknown issue is a possibility to call setkey() the same
> > time encrypt/decrypt is called. As far as I can see it can not be done,
> > but I may be wrong, if so, small changes are needed in hifn_setkey
> > (mainly operation must be done under dev->lock).
> 
> Indeed users should not call setkey while there are still outstanding
> operations.

Hmm, in that case all setkey operations must be protected against
appropriate crypto processing ones, but I do not see if it is ever done
in any driver. Probably they rely on higher layer not to call setkey
simultaneously with encrypt/decrypt (this assumption correct for both
ipsec and dm-crypt), but what if another kernel module will use them?

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  8:21     ` Herbert Xu
@ 2007-05-25  9:00       ` Evgeniy Polyakov
  2007-05-25 11:03         ` Herbert Xu
  0 siblings, 1 reply; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-25  9:00 UTC (permalink / raw)
  To: Herbert Xu; +Cc: linux-crypto, linux-crypto

On Fri, May 25, 2007 at 06:21:25PM +1000, Herbert Xu (herbert@gondor.apana.org.au) wrote:
> Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:
> > 
> > It does not - your code only supposed to work with ecb, since it is what
> > was requested during initialization time. This new scheme with templates
> > helps alot for ciphers/crypto modes which do not support several
> > templates, so I used old scheme with 'cipher' only template, not
> > 'ecb(cipher)', 'cbc(cipher)' and so on.
> 
> I just had a look and your driver should use the cra_name of "ecb(aes)"
> since ECB is what it implements.  The cra_driver_name can be ecb-aes-hifn
> (if there can only be one hifn device, otherwise make it something like
> ecb-aes-hifn-<devname> or ecb-aes-<devname>).

I implemented one mode only to show that it would be good to have one
structure allocated and use different crypto modes with only the same
callbacks. But since there is no way to determine for which mode
encryption is performed until 'mode(cipher)' string is used, I have to
allocate and initialize all supported modes and register them
saparately.

> Your priority should also be above 200 which is where assembly-optimised
> software algorithms are registered at by default.  I suggest 300.

Ok.

> > HIFN supports at least 12 different ciphers/mode (3des, des and aes,
> > each one with 4 modes), so it is not a good idea to put them all into
> > separated structures, so I rised a question about it.
> 
> Well it is a trade-off of a bit of work here vs. the ability to better
> support new cipher modes that arise.  Could we perhaps use macro helpers
> so that you don't have to type out each one by hand?

I think that would be good to have set of functions like
struct crypto_alg alloc_crypto_alg(char *name, setkey_callback,
encrypt/decrypt callbacks);
I will put them into driver initially for testing purposes, later
we could move them into generic code to be reused.

> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  8:55   ` Evgeniy Polyakov
@ 2007-05-25  9:35     ` Sebastian Siewior
  2007-05-25 10:20       ` Evgeniy Polyakov
  2007-05-25 11:01     ` Herbert Xu
  1 sibling, 1 reply; 15+ messages in thread
From: Sebastian Siewior @ 2007-05-25  9:35 UTC (permalink / raw)
  To: linux-crypto

* Evgeniy Polyakov | 2007-05-25 12:55:10 [+0400]:

>Well, it is just hardware queue increase, so essentially for correct
>work it should return -EBUSY in case driver does not accept requests
>anymore (no matter if they are pushed into hardware or linked into
>backlog queue). According to sleeping with CRYPTO_TFM_REQ_MAY_BACKLOG -
>what about ipsec, where it is not allowed to sleep?

Can't you the drop packet than and hope further packets will arrive
slowly or is the packet allready ACKed and you are not allowed to lose
it?

>
>-- 
>	Evgeniy Polyakov
Sebastian

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  9:35     ` Sebastian Siewior
@ 2007-05-25 10:20       ` Evgeniy Polyakov
  2007-05-25 11:35         ` Herbert Xu
  0 siblings, 1 reply; 15+ messages in thread
From: Evgeniy Polyakov @ 2007-05-25 10:20 UTC (permalink / raw)
  To: linux-crypto

On Fri, May 25, 2007 at 11:35:32AM +0200, Sebastian Siewior (linux-crypto@ml.breakpoint.cc) wrote:
> * Evgeniy Polyakov | 2007-05-25 12:55:10 [+0400]:
> 
> >Well, it is just hardware queue increase, so essentially for correct
> >work it should return -EBUSY in case driver does not accept requests
> >anymore (no matter if they are pushed into hardware or linked into
> >backlog queue). According to sleeping with CRYPTO_TFM_REQ_MAY_BACKLOG -
> >what about ipsec, where it is not allowed to sleep?
> 
> Can't you the drop packet than and hope further packets will arrive
> slowly or is the packet allready ACKed and you are not allowed to lose
> it?

It is quite normal to lose packets in network stack - NIC's queue is
limited too and hardware can drop packets whatever it likes.
TCP will just retransmit the packet, and no one cares about UDP.

In case of dm-crypt situation is different - first, it sets may-sleep
flag, which basically means that it can not fail. But if it fails, block
io request is completed with -EIO error.
Essentially this will be dropped down to bio_end_io, which does not get
into account error, but checks if bio is uptodate, appropriate bit is
not set when bio is completed with error.

So, dm-crypt will fail and will not try to process that block again, 
if crypto returns error. In acrypto I put a queue length as parameter 
to crypto device (crypto_alg in cryptoapi) structure, and acrypto 
load balancer always selected device which does have a space in the 
queue. I think something similar should be created for cryptoapi, so 
that even if device has higher prio it should not be selected until 
there is a place in its queue. Software implementation has infinite 
queue of course. In such case we do not need to have backlog queue,
which can be overflown too.

> Sebastian

-- 
	Evgeniy Polyakov

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  8:55   ` Evgeniy Polyakov
  2007-05-25  9:35     ` Sebastian Siewior
@ 2007-05-25 11:01     ` Herbert Xu
  1 sibling, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2007-05-25 11:01 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto

Hi Evgeniy:

On Fri, May 25, 2007 at 12:55:10PM +0400, Evgeniy Polyakov wrote:
> 
> Well, it is just hardware queue increase, so essentially for correct
> work it should return -EBUSY in case driver does not accept requests
> anymore (no matter if they are pushed into hardware or linked into
> backlog queue). According to sleeping with CRYPTO_TFM_REQ_MAY_BACKLOG -
> what about ipsec, where it is not allowed to sleep?
> dm-crypt as the only user of async cryptoapi is allowed to sleep, but
> I'm sure eventually ipsec will be converted (or heavily
> hacked/uglymoroned like I did in acrypto) into async mode too, but
> netowrk processing does not sleep at all. I do not think creating
> dedicated thread for ipsec processing is a good idea, but who knows...

IPsec simply wouldn't use CRYPTO_TFM_REQ_MAY_BACKLOG.  It would instead
drop the packet if the queue is full.

The CRYPTO_TFM_REQ_MAY_BACKLOG flag is used in situations where you
absolutely must queue at least one request (per tfm) and you're able
to either sleep or otherwise block further requests from being issued.
 
> I mostly worry about allocation/freeing/init code amount, memory 
> overhead is not that big, since amount of devices is limited.
> One says lazyness is a progress engine, but somtimes I do not agree :)

Perhaps I'm not understanding it correctly.  As far as I can see
you should be able to use a single function to serve all these
algorithms, no?

Anyway, let me know if it does turn out to be overly difficult and
we can always change things around.

> > > Current driver only supports AES ECB encrypt/decrypt, since I do not 
> > > know how to detect operation mode in runtime (a question).
> > 
> > For each incoming request you have an associated tfm object which has
> > a link to the algorithm object.  The algorithm object provides you
> > the info you need to know which algorithm to use and the tfm object
> > provides the session-specific information which would be the key for
> > ciphers.
> 
> That is how crypto processing is being done, but there is no information
> about how blocks are managed, i.e. were they chained into cbc or just
> one-by-one in ecb. As far as I can see, there is no such knowledge until
> algorithm was registered with new syle scheme with ecb(algo)/cbc(algo)
> strings and so on, in that case there are different strings and/or
> function pointers.

Have a look at padlock-aes/s390 where they handle pretty much the same
thing.  For HIFN I'd suggest that the entry (i.e., the encrypt/decrypt
hooks that the crypto API calls) function supply the mode and any other
algorithm-specific information before calling a generic function.

For example,

static int hifn_encrypt(int mode, struct ablkcipher_request *req)
{
	...do whatever it does now except for setting the mode...
}

static int hifn_get_key_size(struct ablkcipher_request *req)
{
	struct hifn_device *dev = ablkcipher_request_ctx(req);
	
	switch (dev->current_key_len) {
	case 128:
		return HIFN_CRYPT_CMD_KSZ_128;
	case 192:
		return HIFN_CRYPT_CMD_KSZ_192;
	case 256:
		return HIFN_CRYPT_CMD_KSZ_256;
	}

	BUG();
}

static int hifn_cbc_aes_encrypt(struct ablkcipher_request *req)
{
	return hifn_encrypt(HIFN_CRYPT_CMD_MODE_CBC | HIFN_CRYPT_CMD_ALG_AES |
			    hifn_get_key_size(req), req);
}

static int hifn_ecb_aes_encrypt(struct ablkcipher_request *req)
{
	return hifn_encrypt(HIFN_CRYPT_CMD_MODE_ECB | HIFN_CRYPT_CMD_ALG_AES |
			    hifn_get_key_size(req), req);
}

Right now the size is not specific to the algorithm.  If it were then
you could simply or the approriate bit here too.

> > Indeed users should not call setkey while there are still outstanding
> > operations.
> 
> Hmm, in that case all setkey operations must be protected against
> appropriate crypto processing ones, but I do not see if it is ever done
> in any driver. Probably they rely on higher layer not to call setkey
> simultaneously with encrypt/decrypt (this assumption correct for both
> ipsec and dm-crypt), but what if another kernel module will use them?

What I meant is that the crypto user should *never* invoke setkey while
there are outstanding requests.  So if it does happen it's OK for you
to return corrupted/unexpected output, or just BUG.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25  9:00       ` Evgeniy Polyakov
@ 2007-05-25 11:03         ` Herbert Xu
  0 siblings, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2007-05-25 11:03 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto, linux-crypto

On Fri, May 25, 2007 at 01:00:05PM +0400, Evgeniy Polyakov wrote:
> 
> I think that would be good to have set of functions like
> struct crypto_alg alloc_crypto_alg(char *name, setkey_callback,
> encrypt/decrypt callbacks);
> I will put them into driver initially for testing purposes, later
> we could move them into generic code to be reused.

I completely agree.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi.
  2007-05-25 10:20       ` Evgeniy Polyakov
@ 2007-05-25 11:35         ` Herbert Xu
  0 siblings, 0 replies; 15+ messages in thread
From: Herbert Xu @ 2007-05-25 11:35 UTC (permalink / raw)
  To: Evgeniy Polyakov; +Cc: linux-crypto

Evgeniy Polyakov <johnpol@2ka.mipt.ru> wrote:
>
> So, dm-crypt will fail and will not try to process that block again, 
> if crypto returns error. In acrypto I put a queue length as parameter 
> to crypto device (crypto_alg in cryptoapi) structure, and acrypto 
> load balancer always selected device which does have a space in the 
> queue. I think something similar should be created for cryptoapi, so 
> that even if device has higher prio it should not be selected until 
> there is a place in its queue. Software implementation has infinite 
> queue of course. In such case we do not need to have backlog queue,
> which can be overflown too.

The way I've solved is using the MAY_BACKLOG flag.  It's basically
an emergency reserve queue of length 1.  So for each tfm object,
you're guaranteed to be able to queue at least one request which
is sufficient.

This reminds me, I need to refresh my dm-crypt patch and repost it.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-05-25 11:35 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-22 12:58 [1/1] HIFN: preliminary HIFN 795x driver for new async cryptoapi Evgeniy Polyakov
2007-05-22 15:19 ` Sebastian Siewior
2007-05-23  8:03   ` Evgeniy Polyakov
2007-05-23 10:02     ` Sebastian Siewior
2007-05-23 12:30       ` Evgeniy Polyakov
2007-05-25  8:31       ` Herbert Xu
2007-05-25  8:21     ` Herbert Xu
2007-05-25  9:00       ` Evgeniy Polyakov
2007-05-25 11:03         ` Herbert Xu
2007-05-25  8:14 ` Herbert Xu
2007-05-25  8:55   ` Evgeniy Polyakov
2007-05-25  9:35     ` Sebastian Siewior
2007-05-25 10:20       ` Evgeniy Polyakov
2007-05-25 11:35         ` Herbert Xu
2007-05-25 11:01     ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox