From: Denis Vlasenko <vda@ilport.com.ua>
To: Andreas Steinmetz <ast@domdv.de>
Cc: Linux kernel <linux-kernel@vger.kernel.org>
Subject: [PATCH] tiny improvement to x86_64 asm aes encryption
Date: Thu, 24 Nov 2005 12:42:35 +0200 [thread overview]
Message-ID: <200511241242.35294.vda@ilport.com.ua> (raw)
[-- Attachment #1: Type: text/plain, Size: 983 bytes --]
Basically, when de do:
encrypt_round(aes_ft_tab,-96)
encrypt_round(aes_ft_tab,-80)
first encrypt_round produces results in R5,R6,R3,R4,
and then moves R5->R1, R6->R2 for use in second one:
#define encrypt_round(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
move_regs(R1,R2,R5,R6)
But since we _always_ call them in pairs, we can just
swap arguments in second one, eliminating move_regs!
#define encrypt_round1(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
^^^^^ ^^^^^
#define encrypt_round2(TAB,OFFSET) \
round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
^^^^^ ^^^^^
...
encrypt_round1(aes_ft_tab,-96)
encrypt_round2(aes_ft_tab,-80)
"encrypt_final" and "return" macros are changed accordingly.
Of course same thing is done on decrypt path.
Patch is not tested.
--
vda
[-- Attachment #2: z.diff --]
[-- Type: text/x-diff, Size: 3475 bytes --]
--- aes-x86_64-asm.S.org Mon Aug 29 02:41:01 2005
+++ aes-x86_64-asm.S Thu Nov 24 12:34:35 2005
@@ -124,63 +124,63 @@
xorl TAB+1024(,r1,4),r3 ## E;\
xorl TAB(,r2,4),r4 ## E;
-#define move_regs(r1,r2,r3,r4) \
- movl r3 ## E,r1 ## E; \
- movl r4 ## E,r2 ## E;
-
#define entry(FUNC,BASE,B128,B192) \
prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
- move_regs(R1,R2,R5,R6)
+#define return epilogue(R8,R6,R9,R7,R1,R2,R3,R4,R11)
-#define encrypt_final(TAB,OFFSET) \
+#define encrypt_round1(TAB,OFFSET) \
round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
-#define decrypt_round(TAB,OFFSET) \
- round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
- move_regs(R1,R2,R5,R6)
+#define encrypt_round2(TAB,OFFSET) \
+ round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
+
+#define encrypt_final2(TAB,OFFSET) \
+ round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
-#define decrypt_final(TAB,OFFSET) \
+#define decrypt_round1(TAB,OFFSET) \
round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
+#define decrypt_round2(TAB,OFFSET) \
+ round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
+#define decrypt_final2(TAB,OFFSET) \
+ round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
entry(aes_encrypt,0,enc128,enc192)
- encrypt_round(aes_ft_tab,-96)
- encrypt_round(aes_ft_tab,-80)
-enc192: encrypt_round(aes_ft_tab,-64)
- encrypt_round(aes_ft_tab,-48)
-enc128: encrypt_round(aes_ft_tab,-32)
- encrypt_round(aes_ft_tab,-16)
- encrypt_round(aes_ft_tab, 0)
- encrypt_round(aes_ft_tab, 16)
- encrypt_round(aes_ft_tab, 32)
- encrypt_round(aes_ft_tab, 48)
- encrypt_round(aes_ft_tab, 64)
- encrypt_round(aes_ft_tab, 80)
- encrypt_round(aes_ft_tab, 96)
- encrypt_final(aes_fl_tab,112)
+ encrypt_round1(aes_ft_tab,-96)
+ encrypt_round2(aes_ft_tab,-80)
+enc192: encrypt_round1(aes_ft_tab,-64)
+ encrypt_round2(aes_ft_tab,-48)
+enc128: encrypt_round1(aes_ft_tab,-32)
+ encrypt_round2(aes_ft_tab,-16)
+ encrypt_round1(aes_ft_tab, 0)
+ encrypt_round2(aes_ft_tab, 16)
+ encrypt_round1(aes_ft_tab, 32)
+ encrypt_round2(aes_ft_tab, 48)
+ encrypt_round1(aes_ft_tab, 64)
+ encrypt_round2(aes_ft_tab, 80)
+ encrypt_round1(aes_ft_tab, 96)
+ encrypt_final2(aes_fl_tab,112)
return
/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
entry(aes_decrypt,240,dec128,dec192)
- decrypt_round(aes_it_tab,-96)
- decrypt_round(aes_it_tab,-80)
-dec192: decrypt_round(aes_it_tab,-64)
- decrypt_round(aes_it_tab,-48)
-dec128: decrypt_round(aes_it_tab,-32)
- decrypt_round(aes_it_tab,-16)
- decrypt_round(aes_it_tab, 0)
- decrypt_round(aes_it_tab, 16)
- decrypt_round(aes_it_tab, 32)
- decrypt_round(aes_it_tab, 48)
- decrypt_round(aes_it_tab, 64)
- decrypt_round(aes_it_tab, 80)
- decrypt_round(aes_it_tab, 96)
- decrypt_final(aes_il_tab,112)
+ decrypt_round1(aes_it_tab,-96)
+ decrypt_round2(aes_it_tab,-80)
+dec192: decrypt_round1(aes_it_tab,-64)
+ decrypt_round2(aes_it_tab,-48)
+dec128: decrypt_round1(aes_it_tab,-32)
+ decrypt_round2(aes_it_tab,-16)
+ decrypt_round1(aes_it_tab, 0)
+ decrypt_round2(aes_it_tab, 16)
+ decrypt_round1(aes_it_tab, 32)
+ decrypt_round2(aes_it_tab, 48)
+ decrypt_round1(aes_it_tab, 64)
+ decrypt_round2(aes_it_tab, 80)
+ decrypt_round1(aes_it_tab, 96)
+ decrypt_final2(aes_il_tab,112)
return
next reply other threads:[~2005-11-24 10:43 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-24 10:42 Denis Vlasenko [this message]
2005-11-24 17:23 ` [PATCH] tiny improvement to x86_64 asm aes encryption Andi Kleen
2005-11-24 13:33 ` Denis Vlasenko
2005-11-24 18:06 ` Andreas Steinmetz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200511241242.35294.vda@ilport.com.ua \
--to=vda@ilport.com.ua \
--cc=ast@domdv.de \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox