public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tiny improvement to x86_64 asm aes encryption
@ 2005-11-24 10:42 Denis Vlasenko
  2005-11-24 17:23 ` Andi Kleen
  2005-11-24 18:06 ` Andreas Steinmetz
  0 siblings, 2 replies; 4+ messages in thread
From: Denis Vlasenko @ 2005-11-24 10:42 UTC (permalink / raw)
  To: Andreas Steinmetz; +Cc: Linux kernel

[-- Attachment #1: Type: text/plain, Size: 983 bytes --]

Basically, when de do:

        encrypt_round(aes_ft_tab,-96)
        encrypt_round(aes_ft_tab,-80)

first encrypt_round produces results in R5,R6,R3,R4,
and then moves R5->R1, R6->R2 for use in second one:

#define encrypt_round(TAB,OFFSET) \
        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
        move_regs(R1,R2,R5,R6)


But since we _always_ call them in pairs, we can just
swap arguments in second one, eliminating move_regs!


#define encrypt_round1(TAB,OFFSET) \
        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
                         ^^^^^                    ^^^^^
#define encrypt_round2(TAB,OFFSET) \
        round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
                         ^^^^^                    ^^^^^
...
        encrypt_round1(aes_ft_tab,-96)
        encrypt_round2(aes_ft_tab,-80)

"encrypt_final" and "return" macros are changed accordingly.

Of course same thing is done on decrypt path.

Patch is not tested.
--
vda

[-- Attachment #2: z.diff --]
[-- Type: text/x-diff, Size: 3475 bytes --]

--- aes-x86_64-asm.S.org	Mon Aug 29 02:41:01 2005
+++ aes-x86_64-asm.S	Thu Nov 24 12:34:35 2005
@@ -124,63 +124,63 @@
 	xorl	TAB+1024(,r1,4),r3 ## E;\
 	xorl	TAB(,r2,4),r4 ## E;
 
-#define move_regs(r1,r2,r3,r4) \
-	movl	r3 ## E,r1 ## E;	\
-	movl	r4 ## E,r2 ## E;
-
 #define entry(FUNC,BASE,B128,B192) \
 	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
+#define return epilogue(R8,R6,R9,R7,R1,R2,R3,R4,R11)
 
-#define encrypt_final(TAB,OFFSET) \
+#define encrypt_round1(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
 
-#define decrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
+#define encrypt_round2(TAB,OFFSET) \
+	round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
+
+#define encrypt_final2(TAB,OFFSET) \
+	round(TAB,OFFSET,R5,R6,R3,R4,R1,R2,R7,R10,R1,R2,R3,R4)
 
-#define decrypt_final(TAB,OFFSET) \
+#define decrypt_round1(TAB,OFFSET) \
 	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
 
+#define decrypt_round2(TAB,OFFSET) \
+	round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
+#define decrypt_final2(TAB,OFFSET) \
+	round(TAB,OFFSET,R6,R5,R4,R3,R2,R1,R7,R10,R1,R2,R3,R4)
+
 /* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
 
 	entry(aes_encrypt,0,enc128,enc192)
-	encrypt_round(aes_ft_tab,-96)
-	encrypt_round(aes_ft_tab,-80)
-enc192:	encrypt_round(aes_ft_tab,-64)
-	encrypt_round(aes_ft_tab,-48)
-enc128:	encrypt_round(aes_ft_tab,-32)
-	encrypt_round(aes_ft_tab,-16)
-	encrypt_round(aes_ft_tab,  0)
-	encrypt_round(aes_ft_tab, 16)
-	encrypt_round(aes_ft_tab, 32)
-	encrypt_round(aes_ft_tab, 48)
-	encrypt_round(aes_ft_tab, 64)
-	encrypt_round(aes_ft_tab, 80)
-	encrypt_round(aes_ft_tab, 96)
-	encrypt_final(aes_fl_tab,112)
+	encrypt_round1(aes_ft_tab,-96)
+	encrypt_round2(aes_ft_tab,-80)
+enc192:	encrypt_round1(aes_ft_tab,-64)
+	encrypt_round2(aes_ft_tab,-48)
+enc128:	encrypt_round1(aes_ft_tab,-32)
+	encrypt_round2(aes_ft_tab,-16)
+	encrypt_round1(aes_ft_tab,  0)
+	encrypt_round2(aes_ft_tab, 16)
+	encrypt_round1(aes_ft_tab, 32)
+	encrypt_round2(aes_ft_tab, 48)
+	encrypt_round1(aes_ft_tab, 64)
+	encrypt_round2(aes_ft_tab, 80)
+	encrypt_round1(aes_ft_tab, 96)
+	encrypt_final2(aes_fl_tab,112)
 	return
 
 /* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
 
 	entry(aes_decrypt,240,dec128,dec192)
-	decrypt_round(aes_it_tab,-96)
-	decrypt_round(aes_it_tab,-80)
-dec192:	decrypt_round(aes_it_tab,-64)
-	decrypt_round(aes_it_tab,-48)
-dec128:	decrypt_round(aes_it_tab,-32)
-	decrypt_round(aes_it_tab,-16)
-	decrypt_round(aes_it_tab,  0)
-	decrypt_round(aes_it_tab, 16)
-	decrypt_round(aes_it_tab, 32)
-	decrypt_round(aes_it_tab, 48)
-	decrypt_round(aes_it_tab, 64)
-	decrypt_round(aes_it_tab, 80)
-	decrypt_round(aes_it_tab, 96)
-	decrypt_final(aes_il_tab,112)
+	decrypt_round1(aes_it_tab,-96)
+	decrypt_round2(aes_it_tab,-80)
+dec192:	decrypt_round1(aes_it_tab,-64)
+	decrypt_round2(aes_it_tab,-48)
+dec128:	decrypt_round1(aes_it_tab,-32)
+	decrypt_round2(aes_it_tab,-16)
+	decrypt_round1(aes_it_tab,  0)
+	decrypt_round2(aes_it_tab, 16)
+	decrypt_round1(aes_it_tab, 32)
+	decrypt_round2(aes_it_tab, 48)
+	decrypt_round1(aes_it_tab, 64)
+	decrypt_round2(aes_it_tab, 80)
+	decrypt_round1(aes_it_tab, 96)
+	decrypt_final2(aes_il_tab,112)
 	return

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-11-24 18:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-24 10:42 [PATCH] tiny improvement to x86_64 asm aes encryption Denis Vlasenko
2005-11-24 17:23 ` Andi Kleen
2005-11-24 13:33   ` Denis Vlasenko
2005-11-24 18:06 ` Andreas Steinmetz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox