From: "Huang, Ying" <ying.huang@intel.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
"Adam J. Richter" <adam@yggdrasil.com>,
Alexander Kjeldaas <astor@fast.no>,
Sebastian Siewior <linux-crypto@ml.breakpoint.cc>,
akpm@linux-fo
Cc: linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org
Subject: [PATCH -mm crypto] AES: x86_64 asm implementation optimization
Date: Wed, 09 Apr 2008 14:41:02 +0800 [thread overview]
Message-ID: <1207723262.18313.37.camel@caritas-dev.intel.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 5847 bytes --]
This patch increases the performance of AES x86-64 implementation. The
average increment is more than 6.3% and the max increment is
more than 10.2% on Intel CORE 2 CPU. The performance increment is
gained via the following methods:
- Two additional temporary registers are used to hold the subset of
the state, so that the dependency between instructions is reduced.
- The expanded key is loaded via 2 64bit load instead of 4 32-bit load.
This patch is based on 2.6.25-rc8-mm1.
The file attached is the test data via: modprobe tcrypt mode=200
- dmesg_1_core-stockn: stock kernel data
- dmesg_1_core-op4n: patched kernel data
- percent.txt: (time_patched - time_stock) / time_stock * 100
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/x86/crypto/aes-x86_64-asm_64.S | 101 ++++++++++++++++++++----------------
include/crypto/aes.h | 1
2 files changed, 58 insertions(+), 44 deletions(-)
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -46,70 +46,81 @@
#define R7 %rbp
#define R7E %ebp
#define R8 %r8
+#define R8E %r8d
#define R9 %r9
+#define R9E %r9d
#define R10 %r10
#define R11 %r11
+#define R12 %r12
+#define R12E %r12d
+#define R16 %rsp
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
.global FUNC; \
.type FUNC,@function; \
.align 8; \
-FUNC: movq r1,r2; \
- movq r3,r4; \
- leaq BASE+KEY+48+4(r8),r9; \
- movq r10,r11; \
- movl (r7),r5 ## E; \
- movl 4(r7),r1 ## E; \
- movl 8(r7),r6 ## E; \
- movl 12(r7),r7 ## E; \
- movl BASE+0(r8),r10 ## E; \
- xorl -48(r9),r5 ## E; \
- xorl -44(r9),r1 ## E; \
- xorl -40(r9),r6 ## E; \
- xorl -36(r9),r7 ## E; \
- cmpl $24,r10 ## E; \
+FUNC: subq $24,r11; \
+ movl (r6),r4 ## E; \
+ leaq BASE+KEY+48+8(r7),r8; \
+ movq r1,(r11); \
+ movq r9,r10; \
+ movl 4(r6),r1 ## E; \
+ movq r2,8(r11); \
+ movl 8(r6),r5 ## E; \
+ movq r3,16(r11); \
+ movl 12(r6),r6 ## E; \
+ movl BASE+0(r7),r9 ## E; \
+ xorl -48(r8),r4 ## E; \
+ xorl -44(r8),r1 ## E; \
+ xorl -40(r8),r5 ## E; \
+ xorl -36(r8),r6 ## E; \
+ cmpl $24,r9 ## E; \
jb B128; \
- leaq 32(r9),r9; \
+ leaq 32(r8),r8; \
je B192; \
- leaq 32(r9),r9;
+ leaq 32(r8),r8;
#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
- movq r1,r2; \
- movq r3,r4; \
- movl r5 ## E,(r9); \
- movl r6 ## E,4(r9); \
- movl r7 ## E,8(r9); \
- movl r8 ## E,12(r9); \
+ movq (r9),r1; \
+ movl r4 ## E,(r8); \
+ movq 8(r9),r2; \
+ movl r5 ## E,4(r8); \
+ movq 16(r9),r3; \
+ movl r6 ## E,8(r8); \
+ addq $24,r9; \
+ movl r7 ## E,12(r8); \
ret;
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
+#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
+ movl r4 ## E,r8 ## E; \
+ shrl $16,r4 ## E; \
movl TAB+1024(,r5,4),r5 ## E;\
- movw r4 ## X,r2 ## X; \
movl TAB(,r6,4),r6 ## E; \
- roll $16,r2 ## E; \
- shrl $16,r4 ## E; \
movzbl r4 ## H,r7 ## E; \
movzbl r4 ## L,r4 ## E; \
- xorl OFFSET(r8),ra ## E; \
- xorl OFFSET+4(r8),rb ## E; \
+ movq OFFSET(r11),r10; \
+ shrl $16,r2 ## E; \
+ movl r3 ## E,r9 ## E; \
xorl TAB+3072(,r7,4),r5 ## E;\
xorl TAB+2048(,r4,4),r6 ## E;\
- movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
- movl TAB+1024(,r4,4),r4 ## E;\
- movw r3 ## X,r1 ## X; \
- roll $16,r1 ## E; \
+ movzbl r1 ## L,r7 ## E; \
shrl $16,r3 ## E; \
+ movl TAB+1024(,r4,4),r4 ## E;\
xorl TAB(,r7,4),r5 ## E; \
+ shrl $16,r1 ## E; \
movzbl r3 ## H,r7 ## E; \
movzbl r3 ## L,r3 ## E; \
xorl TAB+3072(,r7,4),r4 ## E;\
xorl TAB+2048(,r3,4),r5 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r3 ## E; \
- shrl $16,r1 ## E; \
+ xorl r10 ## E,ra ## E; \
+ movl r9 ## E,r1 ## E; \
+ movq OFFSET+8(r11),r9; \
+ shrq $32,r10; \
xorl TAB+3072(,r7,4),r6 ## E;\
movl TAB+2048(,r3,4),r3 ## E;\
movzbl r1 ## H,r7 ## E; \
@@ -118,38 +129,40 @@ FUNC: movq r1,r2; \
xorl TAB(,r1,4),r3 ## E; \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
- shrl $16,r2 ## E; \
+ xorl r9 ## E, rc ## E; \
+ movl r8 ## E,r2 ## E; \
+ shrq $32,r9; \
+ xorl r10 ## E,rb ## E; \
xorl TAB+3072(,r1,4),r3 ## E;\
xorl TAB+2048(,r7,4),r4 ## E;\
movzbl r2 ## H,r1 ## E; \
+ xorl r9 ## E, rd ## E; \
movzbl r2 ## L,r2 ## E; \
- xorl OFFSET+8(r8),rc ## E; \
- xorl OFFSET+12(r8),rd ## E; \
- xorl TAB+1024(,r1,4),r3 ## E;\
- xorl TAB(,r2,4),r4 ## E;
+ xorl TAB(,r2,4),r4 ## E; \
+ xorl TAB+1024(,r1,4),r3 ## E;
#define move_regs(r1,r2,r3,r4) \
movl r3 ## E,r1 ## E; \
movl r4 ## E,r2 ## E;
#define entry(FUNC,KEY,B128,B192) \
- prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+ prologue(FUNC,KEY,B128,B192,R2,R7,R12,R1,R3,R4,R6,R10,R5,R11,R16)
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return epilogue(R2,R7,R12,R5,R6,R3,R4,R11,R16)
#define encrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
+ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R8,R9,R12,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define encrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
+ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R8,R9,R12,R10,R5,R6,R3,R4)
#define decrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
+ round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R8,R9,R12,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define decrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
+ round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R8,R9,R12,R10,R5,R6,R3,R4)
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -19,6 +19,7 @@
struct crypto_aes_ctx {
u32 key_length;
+ u32 _pad1;
u32 key_enc[AES_MAX_KEYLENGTH_U32];
u32 key_dec[AES_MAX_KEYLENGTH_U32];
};
[-- Attachment #2: dmesg_1_core-stockn --]
[-- Type: text/plain, Size: 9946 bytes --]
e1000: eth2: e1000_watchdog: 10/100 speed: disabling TSO
testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 768 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1202 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3968 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 15065 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 119202 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 552 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1362 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4655 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17731 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 141618 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 593 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1522 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5251 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20262 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 160605 cycles (8192 bytes)
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 573 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1226 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3984 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14999 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 118126 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 580 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1405 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4636 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17604 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 140289 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 619 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1551 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5297 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20286 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 160281 cycles (8192 bytes)
testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 649 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1378 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4333 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16113 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 126978 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 687 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1550 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5002 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 18849 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 150723 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 722 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1713 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5670 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21587 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 170571 cycles (8192 bytes)
testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 770 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1501 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4484 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16368 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 128557 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 811 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1678 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5160 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 19217 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 151977 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 848 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1843 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5840 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21781 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 170436 cycles (8192 bytes)
testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 745 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1525 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4620 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16954 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 132816 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 790 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1696 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5301 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19672 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 156073 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 833 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1870 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5971 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 22368 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 176158 cycles (8192 bytes)
testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 742 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1528 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4617 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16949 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 132822 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 778 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1701 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5291 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19660 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 155871 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 824 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1864 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5978 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 22370 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 176247 cycles (8192 bytes)
testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 770 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1498 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4486 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16456 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128552 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 840 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1721 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5195 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 19166 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 150278 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 921 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1917 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5916 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21977 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 172153 cycles (8192 bytes)
testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 780 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1507 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4486 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16455 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128540 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 853 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1718 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5223 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 19183 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 150166 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 928 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1925 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5942 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21950 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 172112 cycles (8192 bytes)
[-- Attachment #3: dmesg_1_core-op4n --]
[-- Type: text/plain, Size: 9946 bytes --]
e1000: eth2: e1000_watchdog: 10/100 speed: disabling TSO
testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 511 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1153 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3717 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14003 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 110386 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 529 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1300 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4344 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 16576 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 132421 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 568 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1455 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 4969 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 18983 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 151159 cycles (8192 bytes)
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 588 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1140 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3650 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 13721 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 108180 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 554 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1301 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4267 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 16175 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 129410 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 592 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1445 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 4847 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 18501 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 146061 cycles (8192 bytes)
testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 637 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1326 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4086 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 15168 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 119998 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 663 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1478 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4730 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17692 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 141461 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 702 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1628 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5321 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20120 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 159425 cycles (8192 bytes)
testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 741 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1422 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4136 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14971 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 117321 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 756 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1551 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4728 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17419 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 138293 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 810 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1690 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5369 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 19844 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 156878 cycles (8192 bytes)
testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 732 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1459 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4350 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15880 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 124042 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 768 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1639 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 4945 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 18299 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 145070 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 812 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1779 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5580 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 20790 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 163517 cycles (8192 bytes)
testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 727 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1433 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4231 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15406 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 120449 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 762 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1601 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 4823 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 17750 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 140575 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 794 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1725 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5419 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 20121 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 158320 cycles (8192 bytes)
testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 731 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1432 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4254 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15536 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 121465 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 797 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1626 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 4890 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 18007 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 140970 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 867 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1823 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5551 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 20474 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 160336 cycles (8192 bytes)
testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 736 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1412 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4162 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15168 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 118542 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 803 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1602 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 4773 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 17577 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 137579 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 867 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1773 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5405 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 19925 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 155815 cycles (8192 bytes)
[-- Attachment #4: percent.txt --]
[-- Type: text/plain, Size: 2570 bytes --]
ecb1_128_16 -33.46
ecb1_128_64 -4.08
ecb1_128_256 -6.33
ecb1_128_1024 -7.05
ecb1_128_8192 -7.40
ecb1_192_16 -4.17
ecb1_192_64 -4.55
ecb1_192_256 -6.68
ecb1_192_1024 -6.51
ecb1_192_8192 -6.49
ecb1_256_16 -4.22
ecb1_256_64 -4.40
ecb1_256_256 -5.37
ecb1_256_1024 -6.31
ecb1_256_8192 -5.88
ecb0_128_16 2.62
ecb0_128_64 -7.01
ecb0_128_256 -8.38
ecb0_128_1024 -8.52
ecb0_128_8192 -8.42
ecb0_192_16 -4.48
ecb0_192_64 -7.40
ecb0_192_256 -7.96
ecb0_192_1024 -8.12
ecb0_192_8192 -7.75
ecb0_256_16 -4.36
ecb0_256_64 -6.83
ecb0_256_256 -8.50
ecb0_256_1024 -8.80
ecb0_256_8192 -8.87
cbc1_128_16 -1.85
cbc1_128_64 -3.77
cbc1_128_256 -5.70
cbc1_128_1024 -5.86
cbc1_128_8192 -5.50
cbc1_192_16 -3.49
cbc1_192_64 -4.65
cbc1_192_256 -5.44
cbc1_192_1024 -6.14
cbc1_192_8192 -6.15
cbc1_256_16 -2.77
cbc1_256_64 -4.96
cbc1_256_256 -6.16
cbc1_256_1024 -6.80
cbc1_256_8192 -6.53
cbc0_128_16 -3.77
cbc0_128_64 -5.26
cbc0_128_256 -7.76
cbc0_128_1024 -8.53
cbc0_128_8192 -8.74
cbc0_192_16 -6.78
cbc0_192_64 -7.57
cbc0_192_256 -8.37
cbc0_192_1024 -9.36
cbc0_192_8192 -9.00
cbc0_256_16 -4.48
cbc0_256_64 -8.30
cbc0_256_256 -8.07
cbc0_256_1024 -8.89
cbc0_256_8192 -7.95
lrw1_256_16 -1.74
lrw1_256_64 -4.33
lrw1_256_256 -5.84
lrw1_256_1024 -6.33
lrw1_256_8192 -6.61
lrw1_320_16 -2.78
lrw1_320_64 -3.36
lrw1_320_256 -6.72
lrw1_320_1024 -6.98
lrw1_320_8192 -7.05
lrw1_384_16 -2.52
lrw1_384_64 -4.87
lrw1_384_256 -6.55
lrw1_384_1024 -7.05
lrw1_384_8192 -7.18
lrw0_256_16 -2.02
lrw0_256_64 -6.22
lrw0_256_256 -8.36
lrw0_256_1024 -9.10
lrw0_256_8192 -9.32
lrw0_320_16 -2.06
lrw0_320_64 -5.88
lrw0_320_256 -8.85
lrw0_320_1024 -9.72
lrw0_320_8192 -9.81
lrw0_384_16 -3.64
lrw0_384_64 -7.46
lrw0_384_256 -9.35
lrw0_384_1024 -10.05
lrw0_384_8192 -10.17
xts1_256_16 -5.06
xts1_256_64 -4.41
xts1_256_256 -5.17
xts1_256_1024 -5.59
xts1_256_8192 -5.51
xts1_384_16 -5.12
xts1_384_64 -5.52
xts1_384_256 -5.87
xts1_384_1024 -6.05
xts1_384_8192 -6.19
xts1_512_16 -5.86
xts1_512_64 -4.90
xts1_512_256 -6.17
xts1_512_1024 -6.84
xts1_512_8192 -6.86
xts0_256_16 -5.64
xts0_256_64 -6.30
xts0_256_256 -7.22
xts0_256_1024 -7.82
xts0_256_8192 -7.78
xts0_384_16 -5.86
xts0_384_64 -6.75
xts0_384_256 -8.62
xts0_384_1024 -8.37
xts0_384_8192 -8.38
xts0_512_16 -6.57
xts0_512_64 -7.90
xts0_512_256 -9.04
xts0_512_1024 -9.23
xts0_512_8192 -9.47
average: -6.64
min: -33.46
max: 2.62
WARNING: multiple messages have this Message-ID (diff)
From: "Huang, Ying" <ying.huang@intel.com>
To: Herbert Xu <herbert@gondor.apana.org.au>,
"Adam J. Richter" <adam@yggdrasil.com>,
Alexander Kjeldaas <astor@fast.no>,
Sebastian Siewior <linux-crypto@ml.breakpoint.cc>,
akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-crypto@vger.kernel.org
Subject: [PATCH -mm crypto] AES: x86_64 asm implementation optimization
Date: Wed, 09 Apr 2008 14:41:02 +0800 [thread overview]
Message-ID: <1207723262.18313.37.camel@caritas-dev.intel.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 5847 bytes --]
This patch increases the performance of AES x86-64 implementation. The
average increment is more than 6.3% and the max increment is
more than 10.2% on Intel CORE 2 CPU. The performance increment is
gained via the following methods:
- Two additional temporary registers are used to hold the subset of
the state, so that the dependency between instructions is reduced.
- The expanded key is loaded via 2 64bit load instead of 4 32-bit load.
This patch is based on 2.6.25-rc8-mm1.
The file attached is the test data via: modprobe tcrypt mode=200
- dmesg_1_core-stockn: stock kernel data
- dmesg_1_core-op4n: patched kernel data
- percent.txt: (time_patched - time_stock) / time_stock * 100
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/x86/crypto/aes-x86_64-asm_64.S | 101 ++++++++++++++++++++----------------
include/crypto/aes.h | 1
2 files changed, 58 insertions(+), 44 deletions(-)
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -46,70 +46,81 @@
#define R7 %rbp
#define R7E %ebp
#define R8 %r8
+#define R8E %r8d
#define R9 %r9
+#define R9E %r9d
#define R10 %r10
#define R11 %r11
+#define R12 %r12
+#define R12E %r12d
+#define R16 %rsp
#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
.global FUNC; \
.type FUNC,@function; \
.align 8; \
-FUNC: movq r1,r2; \
- movq r3,r4; \
- leaq BASE+KEY+48+4(r8),r9; \
- movq r10,r11; \
- movl (r7),r5 ## E; \
- movl 4(r7),r1 ## E; \
- movl 8(r7),r6 ## E; \
- movl 12(r7),r7 ## E; \
- movl BASE+0(r8),r10 ## E; \
- xorl -48(r9),r5 ## E; \
- xorl -44(r9),r1 ## E; \
- xorl -40(r9),r6 ## E; \
- xorl -36(r9),r7 ## E; \
- cmpl $24,r10 ## E; \
+FUNC: subq $24,r11; \
+ movl (r6),r4 ## E; \
+ leaq BASE+KEY+48+8(r7),r8; \
+ movq r1,(r11); \
+ movq r9,r10; \
+ movl 4(r6),r1 ## E; \
+ movq r2,8(r11); \
+ movl 8(r6),r5 ## E; \
+ movq r3,16(r11); \
+ movl 12(r6),r6 ## E; \
+ movl BASE+0(r7),r9 ## E; \
+ xorl -48(r8),r4 ## E; \
+ xorl -44(r8),r1 ## E; \
+ xorl -40(r8),r5 ## E; \
+ xorl -36(r8),r6 ## E; \
+ cmpl $24,r9 ## E; \
jb B128; \
- leaq 32(r9),r9; \
+ leaq 32(r8),r8; \
je B192; \
- leaq 32(r9),r9;
+ leaq 32(r8),r8;
#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
- movq r1,r2; \
- movq r3,r4; \
- movl r5 ## E,(r9); \
- movl r6 ## E,4(r9); \
- movl r7 ## E,8(r9); \
- movl r8 ## E,12(r9); \
+ movq (r9),r1; \
+ movl r4 ## E,(r8); \
+ movq 8(r9),r2; \
+ movl r5 ## E,4(r8); \
+ movq 16(r9),r3; \
+ movl r6 ## E,8(r8); \
+ addq $24,r9; \
+ movl r7 ## E,12(r8); \
ret;
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
+#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,ra,rb,rc,rd) \
movzbl r2 ## H,r5 ## E; \
movzbl r2 ## L,r6 ## E; \
+ movl r4 ## E,r8 ## E; \
+ shrl $16,r4 ## E; \
movl TAB+1024(,r5,4),r5 ## E;\
- movw r4 ## X,r2 ## X; \
movl TAB(,r6,4),r6 ## E; \
- roll $16,r2 ## E; \
- shrl $16,r4 ## E; \
movzbl r4 ## H,r7 ## E; \
movzbl r4 ## L,r4 ## E; \
- xorl OFFSET(r8),ra ## E; \
- xorl OFFSET+4(r8),rb ## E; \
+ movq OFFSET(r11),r10; \
+ shrl $16,r2 ## E; \
+ movl r3 ## E,r9 ## E; \
xorl TAB+3072(,r7,4),r5 ## E;\
xorl TAB+2048(,r4,4),r6 ## E;\
- movzbl r1 ## L,r7 ## E; \
movzbl r1 ## H,r4 ## E; \
- movl TAB+1024(,r4,4),r4 ## E;\
- movw r3 ## X,r1 ## X; \
- roll $16,r1 ## E; \
+ movzbl r1 ## L,r7 ## E; \
shrl $16,r3 ## E; \
+ movl TAB+1024(,r4,4),r4 ## E;\
xorl TAB(,r7,4),r5 ## E; \
+ shrl $16,r1 ## E; \
movzbl r3 ## H,r7 ## E; \
movzbl r3 ## L,r3 ## E; \
xorl TAB+3072(,r7,4),r4 ## E;\
xorl TAB+2048(,r3,4),r5 ## E;\
movzbl r1 ## H,r7 ## E; \
movzbl r1 ## L,r3 ## E; \
- shrl $16,r1 ## E; \
+ xorl r10 ## E,ra ## E; \
+ movl r9 ## E,r1 ## E; \
+ movq OFFSET+8(r11),r9; \
+ shrq $32,r10; \
xorl TAB+3072(,r7,4),r6 ## E;\
movl TAB+2048(,r3,4),r3 ## E;\
movzbl r1 ## H,r7 ## E; \
@@ -118,38 +129,40 @@ FUNC: movq r1,r2; \
xorl TAB(,r1,4),r3 ## E; \
movzbl r2 ## H,r1 ## E; \
movzbl r2 ## L,r7 ## E; \
- shrl $16,r2 ## E; \
+ xorl r9 ## E, rc ## E; \
+ movl r8 ## E,r2 ## E; \
+ shrq $32,r9; \
+ xorl r10 ## E,rb ## E; \
xorl TAB+3072(,r1,4),r3 ## E;\
xorl TAB+2048(,r7,4),r4 ## E;\
movzbl r2 ## H,r1 ## E; \
+ xorl r9 ## E, rd ## E; \
movzbl r2 ## L,r2 ## E; \
- xorl OFFSET+8(r8),rc ## E; \
- xorl OFFSET+12(r8),rd ## E; \
- xorl TAB+1024(,r1,4),r3 ## E;\
- xorl TAB(,r2,4),r4 ## E;
+ xorl TAB(,r2,4),r4 ## E; \
+ xorl TAB+1024(,r1,4),r3 ## E;
#define move_regs(r1,r2,r3,r4) \
movl r3 ## E,r1 ## E; \
movl r4 ## E,r2 ## E;
#define entry(FUNC,KEY,B128,B192) \
- prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+ prologue(FUNC,KEY,B128,B192,R2,R7,R12,R1,R3,R4,R6,R10,R5,R11,R16)
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return epilogue(R2,R7,R12,R5,R6,R3,R4,R11,R16)
#define encrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
+ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R8,R9,R12,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define encrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
+ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R8,R9,R12,R10,R5,R6,R3,R4)
#define decrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
+ round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R8,R9,R12,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
#define decrypt_final(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
+ round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R8,R9,R12,R10,R5,R6,R3,R4)
/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -19,6 +19,7 @@
struct crypto_aes_ctx {
u32 key_length;
+ u32 _pad1;
u32 key_enc[AES_MAX_KEYLENGTH_U32];
u32 key_dec[AES_MAX_KEYLENGTH_U32];
};
[-- Attachment #2: dmesg_1_core-stockn --]
[-- Type: text/plain, Size: 9946 bytes --]
e1000: eth2: e1000_watchdog: 10/100 speed: disabling TSO
testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 768 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1202 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3968 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 15065 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 119202 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 552 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1362 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4655 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17731 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 141618 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 593 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1522 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5251 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20262 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 160605 cycles (8192 bytes)
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 573 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1226 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3984 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14999 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 118126 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 580 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1405 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4636 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17604 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 140289 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 619 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1551 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5297 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20286 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 160281 cycles (8192 bytes)
testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 649 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1378 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4333 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16113 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 126978 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 687 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1550 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5002 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 18849 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 150723 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 722 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1713 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5670 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21587 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 170571 cycles (8192 bytes)
testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 770 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1501 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4484 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 16368 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 128557 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 811 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1678 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 5160 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 19217 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 151977 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 848 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1843 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5840 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 21781 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 170436 cycles (8192 bytes)
testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 745 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1525 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4620 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16954 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 132816 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 790 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1696 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5301 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19672 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 156073 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 833 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1870 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5971 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 22368 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 176158 cycles (8192 bytes)
testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 742 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1528 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4617 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16949 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 132822 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 778 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1701 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 5291 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 19660 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 155871 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 824 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1864 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5978 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 22370 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 176247 cycles (8192 bytes)
testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 770 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1498 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4486 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16456 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128552 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 840 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1721 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5195 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 19166 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 150278 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 921 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1917 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5916 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21977 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 172153 cycles (8192 bytes)
testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 780 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1507 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4486 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 16455 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 128540 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 853 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1718 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 5223 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 19183 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 150166 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 928 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1925 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5942 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 21950 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 172112 cycles (8192 bytes)
[-- Attachment #3: dmesg_1_core-op4n --]
[-- Type: text/plain, Size: 9946 bytes --]
e1000: eth2: e1000_watchdog: 10/100 speed: disabling TSO
testing speed of ecb(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 511 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1153 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3717 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14003 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 110386 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 529 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1300 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4344 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 16576 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 132421 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 568 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1455 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 4969 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 18983 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 151159 cycles (8192 bytes)
testing speed of ecb(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 588 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1140 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 3650 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 13721 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 108180 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 554 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1301 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4267 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 16175 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 129410 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 592 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1445 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 4847 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 18501 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 146061 cycles (8192 bytes)
testing speed of cbc(aes) encryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 637 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1326 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4086 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 15168 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 119998 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 663 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1478 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4730 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17692 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 141461 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 702 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1628 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5321 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 20120 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 159425 cycles (8192 bytes)
testing speed of cbc(aes) decryption
test 0 (128 bit key, 16 byte blocks): 1 operation in 741 cycles (16 bytes)
test 1 (128 bit key, 64 byte blocks): 1 operation in 1422 cycles (64 bytes)
test 2 (128 bit key, 256 byte blocks): 1 operation in 4136 cycles (256 bytes)
test 3 (128 bit key, 1024 byte blocks): 1 operation in 14971 cycles (1024 bytes)
test 4 (128 bit key, 8192 byte blocks): 1 operation in 117321 cycles (8192 bytes)
test 5 (192 bit key, 16 byte blocks): 1 operation in 756 cycles (16 bytes)
test 6 (192 bit key, 64 byte blocks): 1 operation in 1551 cycles (64 bytes)
test 7 (192 bit key, 256 byte blocks): 1 operation in 4728 cycles (256 bytes)
test 8 (192 bit key, 1024 byte blocks): 1 operation in 17419 cycles (1024 bytes)
test 9 (192 bit key, 8192 byte blocks): 1 operation in 138293 cycles (8192 bytes)
test 10 (256 bit key, 16 byte blocks): 1 operation in 810 cycles (16 bytes)
test 11 (256 bit key, 64 byte blocks): 1 operation in 1690 cycles (64 bytes)
test 12 (256 bit key, 256 byte blocks): 1 operation in 5369 cycles (256 bytes)
test 13 (256 bit key, 1024 byte blocks): 1 operation in 19844 cycles (1024 bytes)
test 14 (256 bit key, 8192 byte blocks): 1 operation in 156878 cycles (8192 bytes)
testing speed of lrw(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 732 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1459 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4350 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15880 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 124042 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 768 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1639 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 4945 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 18299 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 145070 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 812 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1779 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5580 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 20790 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 163517 cycles (8192 bytes)
testing speed of lrw(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 727 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1433 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4231 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15406 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 120449 cycles (8192 bytes)
test 5 (320 bit key, 16 byte blocks): 1 operation in 762 cycles (16 bytes)
test 6 (320 bit key, 64 byte blocks): 1 operation in 1601 cycles (64 bytes)
test 7 (320 bit key, 256 byte blocks): 1 operation in 4823 cycles (256 bytes)
test 8 (320 bit key, 1024 byte blocks): 1 operation in 17750 cycles (1024 bytes)
test 9 (320 bit key, 8192 byte blocks): 1 operation in 140575 cycles (8192 bytes)
test 10 (384 bit key, 16 byte blocks): 1 operation in 794 cycles (16 bytes)
test 11 (384 bit key, 64 byte blocks): 1 operation in 1725 cycles (64 bytes)
test 12 (384 bit key, 256 byte blocks): 1 operation in 5419 cycles (256 bytes)
test 13 (384 bit key, 1024 byte blocks): 1 operation in 20121 cycles (1024 bytes)
test 14 (384 bit key, 8192 byte blocks): 1 operation in 158320 cycles (8192 bytes)
testing speed of xts(aes) encryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 731 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1432 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4254 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15536 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 121465 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 797 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1626 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 4890 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 18007 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 140970 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 867 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1823 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5551 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 20474 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 160336 cycles (8192 bytes)
testing speed of xts(aes) decryption
test 0 (256 bit key, 16 byte blocks): 1 operation in 736 cycles (16 bytes)
test 1 (256 bit key, 64 byte blocks): 1 operation in 1412 cycles (64 bytes)
test 2 (256 bit key, 256 byte blocks): 1 operation in 4162 cycles (256 bytes)
test 3 (256 bit key, 1024 byte blocks): 1 operation in 15168 cycles (1024 bytes)
test 4 (256 bit key, 8192 byte blocks): 1 operation in 118542 cycles (8192 bytes)
test 5 (384 bit key, 16 byte blocks): 1 operation in 803 cycles (16 bytes)
test 6 (384 bit key, 64 byte blocks): 1 operation in 1602 cycles (64 bytes)
test 7 (384 bit key, 256 byte blocks): 1 operation in 4773 cycles (256 bytes)
test 8 (384 bit key, 1024 byte blocks): 1 operation in 17577 cycles (1024 bytes)
test 9 (384 bit key, 8192 byte blocks): 1 operation in 137579 cycles (8192 bytes)
test 10 (512 bit key, 16 byte blocks): 1 operation in 867 cycles (16 bytes)
test 11 (512 bit key, 64 byte blocks): 1 operation in 1773 cycles (64 bytes)
test 12 (512 bit key, 256 byte blocks): 1 operation in 5405 cycles (256 bytes)
test 13 (512 bit key, 1024 byte blocks): 1 operation in 19925 cycles (1024 bytes)
test 14 (512 bit key, 8192 byte blocks): 1 operation in 155815 cycles (8192 bytes)
[-- Attachment #4: percent.txt --]
[-- Type: text/plain, Size: 2570 bytes --]
ecb1_128_16 -33.46
ecb1_128_64 -4.08
ecb1_128_256 -6.33
ecb1_128_1024 -7.05
ecb1_128_8192 -7.40
ecb1_192_16 -4.17
ecb1_192_64 -4.55
ecb1_192_256 -6.68
ecb1_192_1024 -6.51
ecb1_192_8192 -6.49
ecb1_256_16 -4.22
ecb1_256_64 -4.40
ecb1_256_256 -5.37
ecb1_256_1024 -6.31
ecb1_256_8192 -5.88
ecb0_128_16 2.62
ecb0_128_64 -7.01
ecb0_128_256 -8.38
ecb0_128_1024 -8.52
ecb0_128_8192 -8.42
ecb0_192_16 -4.48
ecb0_192_64 -7.40
ecb0_192_256 -7.96
ecb0_192_1024 -8.12
ecb0_192_8192 -7.75
ecb0_256_16 -4.36
ecb0_256_64 -6.83
ecb0_256_256 -8.50
ecb0_256_1024 -8.80
ecb0_256_8192 -8.87
cbc1_128_16 -1.85
cbc1_128_64 -3.77
cbc1_128_256 -5.70
cbc1_128_1024 -5.86
cbc1_128_8192 -5.50
cbc1_192_16 -3.49
cbc1_192_64 -4.65
cbc1_192_256 -5.44
cbc1_192_1024 -6.14
cbc1_192_8192 -6.15
cbc1_256_16 -2.77
cbc1_256_64 -4.96
cbc1_256_256 -6.16
cbc1_256_1024 -6.80
cbc1_256_8192 -6.53
cbc0_128_16 -3.77
cbc0_128_64 -5.26
cbc0_128_256 -7.76
cbc0_128_1024 -8.53
cbc0_128_8192 -8.74
cbc0_192_16 -6.78
cbc0_192_64 -7.57
cbc0_192_256 -8.37
cbc0_192_1024 -9.36
cbc0_192_8192 -9.00
cbc0_256_16 -4.48
cbc0_256_64 -8.30
cbc0_256_256 -8.07
cbc0_256_1024 -8.89
cbc0_256_8192 -7.95
lrw1_256_16 -1.74
lrw1_256_64 -4.33
lrw1_256_256 -5.84
lrw1_256_1024 -6.33
lrw1_256_8192 -6.61
lrw1_320_16 -2.78
lrw1_320_64 -3.36
lrw1_320_256 -6.72
lrw1_320_1024 -6.98
lrw1_320_8192 -7.05
lrw1_384_16 -2.52
lrw1_384_64 -4.87
lrw1_384_256 -6.55
lrw1_384_1024 -7.05
lrw1_384_8192 -7.18
lrw0_256_16 -2.02
lrw0_256_64 -6.22
lrw0_256_256 -8.36
lrw0_256_1024 -9.10
lrw0_256_8192 -9.32
lrw0_320_16 -2.06
lrw0_320_64 -5.88
lrw0_320_256 -8.85
lrw0_320_1024 -9.72
lrw0_320_8192 -9.81
lrw0_384_16 -3.64
lrw0_384_64 -7.46
lrw0_384_256 -9.35
lrw0_384_1024 -10.05
lrw0_384_8192 -10.17
xts1_256_16 -5.06
xts1_256_64 -4.41
xts1_256_256 -5.17
xts1_256_1024 -5.59
xts1_256_8192 -5.51
xts1_384_16 -5.12
xts1_384_64 -5.52
xts1_384_256 -5.87
xts1_384_1024 -6.05
xts1_384_8192 -6.19
xts1_512_16 -5.86
xts1_512_64 -4.90
xts1_512_256 -6.17
xts1_512_1024 -6.84
xts1_512_8192 -6.86
xts0_256_16 -5.64
xts0_256_64 -6.30
xts0_256_256 -7.22
xts0_256_1024 -7.82
xts0_256_8192 -7.78
xts0_384_16 -5.86
xts0_384_64 -6.75
xts0_384_256 -8.62
xts0_384_1024 -8.37
xts0_384_8192 -8.38
xts0_512_16 -6.57
xts0_512_64 -7.90
xts0_512_256 -9.04
xts0_512_1024 -9.23
xts0_512_8192 -9.47
average: -6.64
min: -33.46
max: 2.62
next reply other threads:[~2008-04-09 6:41 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-04-09 6:41 Huang, Ying [this message]
2008-04-09 6:41 ` [PATCH -mm crypto] AES: x86_64 asm implementation optimization Huang, Ying
2008-04-16 7:31 ` Sebastian Siewior
2008-04-16 8:19 ` Huang, Ying
2008-04-16 8:23 ` Andi Kleen
2008-04-16 9:50 ` Herbert Xu
2008-04-16 18:40 ` Sebastian Siewior
2008-04-17 1:52 ` Huang, Ying
2008-04-17 3:34 ` Herbert Xu
2008-04-17 4:53 ` Huang, Ying
2008-04-23 22:28 ` Sebastian Siewior
2008-04-24 0:51 ` Herbert Xu
2008-04-17 3:36 ` Huang, Ying
2008-04-23 22:32 ` Sebastian Siewior
2008-04-25 3:11 ` Huang, Ying
2008-04-25 7:12 ` Sebastian Siewior
2008-04-25 7:21 ` Huang, Ying
2008-04-25 7:37 ` Sebastian Siewior
2008-04-29 22:12 ` Sebastian Siewior
2008-05-04 6:25 ` dean gaudet
2008-05-07 5:12 ` Huang, Ying
2008-05-07 5:26 ` Huang, Ying
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1207723262.18313.37.camel@caritas-dev.intel.com \
--to=ying.huang@intel.com \
--cc=adam@yggdrasil.com \
--cc=akpm@linux-fo \
--cc=astor@fast.no \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@ml.breakpoint.cc \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.