linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Denys Vlasenko <vda.linux@googlemail.com>
To: Herbert Xu <herbert@gondor.apana.org.au>
Cc: linux-crypto@vger.kernel.org
Subject: [PATCH 4/5] camellia: de-unrolling
Date: Thu, 25 Oct 2007 12:47:16 +0100	[thread overview]
Message-ID: <200710251247.16843.vda.linux@googlemail.com> (raw)
In-Reply-To: <200710251243.58701.vda.linux@googlemail.com>

[-- Attachment #1: Type: text/plain, Size: 494 bytes --]

On Thursday 25 October 2007 12:43, Denys Vlasenko wrote:
> Hi Hervert,
> 
> Please review and maybe propagate upstream following patches.
> 
> camellia4.diff
>     Move huge unrolled pieces of code (3 screenfuls) at the end of
>     128/256 key setup routines into common camellia_setup_tail(),
>     convert it to loop there.
>     Loop is still unrolled six times, so performance hit is very small,
>     code size win is big.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
--
vda

[-- Attachment #2: camellia4.diff --]
[-- Type: text/x-diff, Size: 6849 bytes --]

--- linux-2.6.23.src/crypto/camellia3.c	2007-10-24 19:03:27.000000000 +0100
+++ linux-2.6.23.src/crypto/camellia.c	2007-10-24 19:03:57.000000000 +0100
@@ -424,6 +424,27 @@ static const u32 camellia_sp4404[256] = 
 #define SUBKEY_L(INDEX) (subkey[(INDEX)*2])
 #define SUBKEY_R(INDEX) (subkey[(INDEX)*2 + 1])
 
+static void camellia_setup_tail(u32 *subkey, int max)
+{
+	u32 dw;
+	int i = 2;
+	do {
+		dw = SUBKEY_L(i + 0) ^ SUBKEY_R(i + 0); dw = ROL8(dw);/* round 1 */
+		SUBKEY_R(i + 0) = SUBKEY_L(i + 0) ^ dw; SUBKEY_L(i + 0) = dw;
+		dw = SUBKEY_L(i + 1) ^ SUBKEY_R(i + 1); dw = ROL8(dw);/* round 2 */
+		SUBKEY_R(i + 1) = SUBKEY_L(i + 1) ^ dw; SUBKEY_L(i + 1) = dw;
+		dw = SUBKEY_L(i + 2) ^ SUBKEY_R(i + 2); dw = ROL8(dw);/* round 3 */
+		SUBKEY_R(i + 2) = SUBKEY_L(i + 2) ^ dw; SUBKEY_L(i + 2) = dw;
+		dw = SUBKEY_L(i + 3) ^ SUBKEY_R(i + 3); dw = ROL8(dw);/* round 4 */
+		SUBKEY_R(i + 3) = SUBKEY_L(i + 3) ^ dw; SUBKEY_L(i + 3) = dw;
+		dw = SUBKEY_L(i + 4) ^ SUBKEY_R(i + 4); dw = ROL8(dw);/* round 5 */
+		SUBKEY_R(i + 4) = SUBKEY_L(i + 4) ^ dw; SUBKEY_L(i + 4) = dw;
+		dw = SUBKEY_L(i + 5) ^ SUBKEY_R(i + 5); dw = ROL8(dw);/* round 6 */
+		SUBKEY_R(i + 5) = SUBKEY_L(i + 5) ^ dw; SUBKEY_L(i + 5) = dw;
+		i += 8;
+	} while (i < max);
+}
+
 static void camellia_setup128(const unsigned char *key, u32 *subkey)
 {
 	u32 kll, klr, krl, krr;
@@ -650,42 +671,7 @@ static void camellia_setup128(const unsi
 	SUBKEY_R(24) = subR[24] ^ subR[23];
 
 	/* apply the inverse of the last half of P-function */
-	dw = SUBKEY_L(2) ^ SUBKEY_R(2); dw = ROL8(dw);/* round 1 */
-	SUBKEY_R(2) = SUBKEY_L(2) ^ dw; SUBKEY_L(2) = dw;
-	dw = SUBKEY_L(3) ^ SUBKEY_R(3); dw = ROL8(dw);/* round 2 */
-	SUBKEY_R(3) = SUBKEY_L(3) ^ dw; SUBKEY_L(3) = dw;
-	dw = SUBKEY_L(4) ^ SUBKEY_R(4); dw = ROL8(dw);/* round 3 */
-	SUBKEY_R(4) = SUBKEY_L(4) ^ dw; SUBKEY_L(4) = dw;
-	dw = SUBKEY_L(5) ^ SUBKEY_R(5); dw = ROL8(dw);/* round 4 */
-	SUBKEY_R(5) = SUBKEY_L(5) ^ dw; SUBKEY_L(5) = dw;
-	dw = SUBKEY_L(6) ^ SUBKEY_R(6); dw = ROL8(dw);/* round 5 */
-	SUBKEY_R(6) = SUBKEY_L(6) ^ dw; SUBKEY_L(6) = dw;
-	dw = SUBKEY_L(7) ^ SUBKEY_R(7); dw = ROL8(dw);/* round 6 */
-	SUBKEY_R(7) = SUBKEY_L(7) ^ dw; SUBKEY_L(7) = dw;
-	dw = SUBKEY_L(10) ^ SUBKEY_R(10); dw = ROL8(dw);/* round 7 */
-	SUBKEY_R(10) = SUBKEY_L(10) ^ dw; SUBKEY_L(10) = dw;
-	dw = SUBKEY_L(11) ^ SUBKEY_R(11); dw = ROL8(dw);/* round 8 */
-	SUBKEY_R(11) = SUBKEY_L(11) ^ dw; SUBKEY_L(11) = dw;
-	dw = SUBKEY_L(12) ^ SUBKEY_R(12); dw = ROL8(dw);/* round 9 */
-	SUBKEY_R(12) = SUBKEY_L(12) ^ dw; SUBKEY_L(12) = dw;
-	dw = SUBKEY_L(13) ^ SUBKEY_R(13); dw = ROL8(dw);/* round 10 */
-	SUBKEY_R(13) = SUBKEY_L(13) ^ dw; SUBKEY_L(13) = dw;
-	dw = SUBKEY_L(14) ^ SUBKEY_R(14); dw = ROL8(dw);/* round 11 */
-	SUBKEY_R(14) = SUBKEY_L(14) ^ dw; SUBKEY_L(14) = dw;
-	dw = SUBKEY_L(15) ^ SUBKEY_R(15); dw = ROL8(dw);/* round 12 */
-	SUBKEY_R(15) = SUBKEY_L(15) ^ dw; SUBKEY_L(15) = dw;
-	dw = SUBKEY_L(18) ^ SUBKEY_R(18); dw = ROL8(dw);/* round 13 */
-	SUBKEY_R(18) = SUBKEY_L(18) ^ dw; SUBKEY_L(18) = dw;
-	dw = SUBKEY_L(19) ^ SUBKEY_R(19); dw = ROL8(dw);/* round 14 */
-	SUBKEY_R(19) = SUBKEY_L(19) ^ dw; SUBKEY_L(19) = dw;
-	dw = SUBKEY_L(20) ^ SUBKEY_R(20); dw = ROL8(dw);/* round 15 */
-	SUBKEY_R(20) = SUBKEY_L(20) ^ dw; SUBKEY_L(20) = dw;
-	dw = SUBKEY_L(21) ^ SUBKEY_R(21); dw = ROL8(dw);/* round 16 */
-	SUBKEY_R(21) = SUBKEY_L(21) ^ dw; SUBKEY_L(21) = dw;
-	dw = SUBKEY_L(22) ^ SUBKEY_R(22); dw = ROL8(dw);/* round 17 */
-	SUBKEY_R(22) = SUBKEY_L(22) ^ dw; SUBKEY_L(22) = dw;
-	dw = SUBKEY_L(23) ^ SUBKEY_R(23); dw = ROL8(dw);/* round 18 */
-	SUBKEY_R(23) = SUBKEY_L(23) ^ dw; SUBKEY_L(23) = dw;
+	camellia_setup_tail(subkey, 24);
 }
 
 static void camellia_setup256(const unsigned char *key, u32 *subkey)
@@ -995,54 +981,7 @@ static void camellia_setup256(const unsi
 	SUBKEY_R(32) = subR[32] ^ subR[31];
 
 	/* apply the inverse of the last half of P-function */
-	dw = SUBKEY_L(2) ^ SUBKEY_R(2); dw = ROL8(dw);/* round 1 */
-	SUBKEY_R(2) = SUBKEY_L(2) ^ dw; SUBKEY_L(2) = dw;
-	dw = SUBKEY_L(3) ^ SUBKEY_R(3); dw = ROL8(dw);/* round 2 */
-	SUBKEY_R(3) = SUBKEY_L(3) ^ dw; SUBKEY_L(3) = dw;
-	dw = SUBKEY_L(4) ^ SUBKEY_R(4); dw = ROL8(dw);/* round 3 */
-	SUBKEY_R(4) = SUBKEY_L(4) ^ dw; SUBKEY_L(4) = dw;
-	dw = SUBKEY_L(5) ^ SUBKEY_R(5); dw = ROL8(dw);/* round 4 */
-	SUBKEY_R(5) = SUBKEY_L(5) ^ dw; SUBKEY_L(5) = dw;
-	dw = SUBKEY_L(6) ^ SUBKEY_R(6); dw = ROL8(dw);/* round 5 */
-	SUBKEY_R(6) = SUBKEY_L(6) ^ dw; SUBKEY_L(6) = dw;
-	dw = SUBKEY_L(7) ^ SUBKEY_R(7); dw = ROL8(dw);/* round 6 */
-	SUBKEY_R(7) = SUBKEY_L(7) ^ dw; SUBKEY_L(7) = dw;
-	dw = SUBKEY_L(10) ^ SUBKEY_R(10); dw = ROL8(dw);/* round 7 */
-	SUBKEY_R(10) = SUBKEY_L(10) ^ dw; SUBKEY_L(10) = dw;
-	dw = SUBKEY_L(11) ^ SUBKEY_R(11); dw = ROL8(dw);/* round 8 */
-	SUBKEY_R(11) = SUBKEY_L(11) ^ dw; SUBKEY_L(11) = dw;
-	dw = SUBKEY_L(12) ^ SUBKEY_R(12); dw = ROL8(dw);/* round 9 */
-	SUBKEY_R(12) = SUBKEY_L(12) ^ dw; SUBKEY_L(12) = dw;
-	dw = SUBKEY_L(13) ^ SUBKEY_R(13); dw = ROL8(dw);/* round 10 */
-	SUBKEY_R(13) = SUBKEY_L(13) ^ dw; SUBKEY_L(13) = dw;
-	dw = SUBKEY_L(14) ^ SUBKEY_R(14); dw = ROL8(dw);/* round 11 */
-	SUBKEY_R(14) = SUBKEY_L(14) ^ dw; SUBKEY_L(14) = dw;
-	dw = SUBKEY_L(15) ^ SUBKEY_R(15); dw = ROL8(dw);/* round 12 */
-	SUBKEY_R(15) = SUBKEY_L(15) ^ dw; SUBKEY_L(15) = dw;
-	dw = SUBKEY_L(18) ^ SUBKEY_R(18); dw = ROL8(dw);/* round 13 */
-	SUBKEY_R(18) = SUBKEY_L(18) ^ dw; SUBKEY_L(18) = dw;
-	dw = SUBKEY_L(19) ^ SUBKEY_R(19); dw = ROL8(dw);/* round 14 */
-	SUBKEY_R(19) = SUBKEY_L(19) ^ dw; SUBKEY_L(19) = dw;
-	dw = SUBKEY_L(20) ^ SUBKEY_R(20); dw = ROL8(dw);/* round 15 */
-	SUBKEY_R(20) = SUBKEY_L(20) ^ dw; SUBKEY_L(20) = dw;
-	dw = SUBKEY_L(21) ^ SUBKEY_R(21); dw = ROL8(dw);/* round 16 */
-	SUBKEY_R(21) = SUBKEY_L(21) ^ dw; SUBKEY_L(21) = dw;
-	dw = SUBKEY_L(22) ^ SUBKEY_R(22); dw = ROL8(dw);/* round 17 */
-	SUBKEY_R(22) = SUBKEY_L(22) ^ dw; SUBKEY_L(22) = dw;
-	dw = SUBKEY_L(23) ^ SUBKEY_R(23); dw = ROL8(dw);/* round 18 */
-	SUBKEY_R(23) = SUBKEY_L(23) ^ dw; SUBKEY_L(23) = dw;
-	dw = SUBKEY_L(26) ^ SUBKEY_R(26); dw = ROL8(dw);/* round 19 */
-	SUBKEY_R(26) = SUBKEY_L(26) ^ dw; SUBKEY_L(26) = dw;
-	dw = SUBKEY_L(27) ^ SUBKEY_R(27); dw = ROL8(dw);/* round 20 */
-	SUBKEY_R(27) = SUBKEY_L(27) ^ dw; SUBKEY_L(27) = dw;
-	dw = SUBKEY_L(28) ^ SUBKEY_R(28); dw = ROL8(dw);/* round 21 */
-	SUBKEY_R(28) = SUBKEY_L(28) ^ dw; SUBKEY_L(28) = dw;
-	dw = SUBKEY_L(29) ^ SUBKEY_R(29); dw = ROL8(dw);/* round 22 */
-	SUBKEY_R(29) = SUBKEY_L(29) ^ dw; SUBKEY_L(29) = dw;
-	dw = SUBKEY_L(30) ^ SUBKEY_R(30); dw = ROL8(dw);/* round 23 */
-	SUBKEY_R(30) = SUBKEY_L(30) ^ dw; SUBKEY_L(30) = dw;
-	dw = SUBKEY_L(31) ^ SUBKEY_R(31); dw = ROL8(dw);/* round 24 */
-	SUBKEY_R(31) = SUBKEY_L(31) ^ dw; SUBKEY_L(31) = dw;
+	camellia_setup_tail(subkey, 32);
 }
 
 static void camellia_setup192(const unsigned char *key, u32 *subkey)

  parent reply	other threads:[~2007-10-25 11:47 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-10-25 11:43 [PATCH0/5] camellia: cleanup, de-unrolling, and 64bit-ization Denys Vlasenko
2007-10-25 11:45 ` [PATCH 1/5] camellia: cleanup Denys Vlasenko
2007-10-26  8:43   ` Noriaki TAKAMIYA
2007-11-06 14:17   ` Herbert Xu
2007-10-25 11:45 ` [PATCH 2/5] " Denys Vlasenko
2007-10-26  8:44   ` Noriaki TAKAMIYA
2007-11-06 14:19   ` Herbert Xu
2007-10-25 11:46 ` [PATCH 3/5] " Denys Vlasenko
2007-10-26  8:44   ` Noriaki TAKAMIYA
2007-11-06 14:21   ` Herbert Xu
2007-10-25 11:47 ` Denys Vlasenko [this message]
2007-10-26  8:45   ` [PATCH 4/5] camellia: de-unrolling Noriaki TAKAMIYA
2007-11-06 14:21   ` Herbert Xu
2007-10-25 11:48 ` [PATCH 5/5] camellia: de-unrolling, 64bit-ization Denys Vlasenko
2007-10-26  8:45   ` Noriaki TAKAMIYA
2007-11-06 14:23   ` Herbert Xu
2007-11-07 13:22     ` Denys Vlasenko
2007-11-08 13:30       ` Herbert Xu
2007-11-13  6:07         ` Noriaki TAKAMIYA
2007-11-13  6:25           ` [camellia-oss:00952] " Noriaki TAKAMIYA
2007-11-13 22:34             ` Denys Vlasenko
2007-11-14  1:41               ` David Miller
2007-11-14  2:47                 ` Denys Vlasenko
2007-11-14  3:49                   ` David Miller
2007-11-14  5:30                     ` Denys Vlasenko
2007-11-14  6:10                       ` David Miller
2007-11-14  7:38                         ` Denys Vlasenko
2007-11-14  7:15                       ` Denys Vlasenko
2007-11-14 14:14                         ` Herbert Xu
2007-11-14 21:28                           ` Denys Vlasenko
2007-11-18 13:21                             ` Herbert Xu
2007-11-19  4:30                               ` Denys Vlasenko
2007-11-19 18:49                                 ` Noriaki TAKAMIYA
2007-11-21  2:44                                   ` Denys Vlasenko
2007-11-21  3:53                                 ` Herbert Xu
2007-11-21  8:08                                   ` Denys Vlasenko
2007-11-21  8:12                                     ` Herbert Xu
2007-11-21  8:38                                       ` Denys Vlasenko
2007-11-14  4:18                   ` Noriaki TAKAMIYA
2007-10-25 11:57 ` [PATCH0/5] camellia: cleanup, de-unrolling, and 64bit-ization Denys Vlasenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200710251247.16843.vda.linux@googlemail.com \
    --to=vda.linux@googlemail.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).