From: Linus Torvalds <torvalds@linux-foundation.org>
To: Artur Skawina <art.08.09@gmail.com>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: Re: [PATCH 0/7] block-sha1: improved SHA1 hashing
Date: Thu, 6 Aug 2009 12:41:00 -0700 (PDT) [thread overview]
Message-ID: <alpine.LFD.2.01.0908061233360.3390@localhost.localdomain> (raw)
In-Reply-To: <4A7B2A88.2040602@gmail.com>
On Thu, 6 Aug 2009, Artur Skawina wrote:
>
> Oh, i noticed that '-mtune' makes quite a difference, it can change
> the relative performance of the functions significantly, in unobvious
> ways; depending on which cpu gcc tunes for (build config or -mtune);
> some implementations slow down, others become a bit faster.
That probably is mainly true for P4, although it's quite possible that it
has an effect for just what the register allocator does, and then for
spilling.
And it looks like _all_ the tweakability is in the spilling. Nothing else
matters.
How does this patch work for you? It avoids doing that C-level register
rotation, and instead rotates the register names with the preprocessor.
I realize it's ugly as hell, but it does make it easier for gcc to see
what's going on.
The patch is against my git patches, but I think it should apply pretty
much as-is to your sha1bench sources too. Does it make any difference for
you?
Linus
---
block-sha1/sha1.c | 117 ++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 90 insertions(+), 27 deletions(-)
diff --git a/block-sha1/sha1.c b/block-sha1/sha1.c
index 78dcb0c..ac47162 100644
--- a/block-sha1/sha1.c
+++ b/block-sha1/sha1.c
@@ -101,20 +101,20 @@ void blk_SHA1_Final(unsigned char hashout[20], blk_SHA_CTX *ctx)
#define SHA_SRC(t) htonl(data[t])
#define SHA_MIX(t) SHA_ROL(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
-#define SHA_ROUND(t, input, fn, constant) \
- TEMP = input(t); W(t) = TEMP; \
- TEMP += SHA_ROL(A,5) + (fn) + E + (constant); \
- E = D; D = C; C = SHA_ROR(B, 2); B = A; A = TEMP
+#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
+ unsigned int TEMP = input(t); W(t) = TEMP; \
+ TEMP += E + SHA_ROL(A,5) + (fn) + (constant); \
+ B = SHA_ROR(B, 2); E = TEMP; } while (0)
-#define T_0_15(t) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999 )
-#define T_16_19(t) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999 )
-#define T_20_39(t) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1 )
-#define T_40_59(t) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc )
-#define T_60_79(t) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6 )
+#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
+#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
+#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
+#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
static void blk_SHA1Block(blk_SHA_CTX *ctx, const unsigned int *data)
{
- unsigned int A,B,C,D,E,TEMP;
+ unsigned int A,B,C,D,E;
unsigned int array[16];
A = ctx->H[0];
@@ -124,31 +124,94 @@ static void blk_SHA1Block(blk_SHA_CTX *ctx, const unsigned int *data)
E = ctx->H[4];
/* Round 1 - iterations 0-16 take their input from 'data' */
- T_0_15( 0); T_0_15( 1); T_0_15( 2); T_0_15( 3); T_0_15( 4);
- T_0_15( 5); T_0_15( 6); T_0_15( 7); T_0_15( 8); T_0_15( 9);
- T_0_15(10); T_0_15(11); T_0_15(12); T_0_15(13); T_0_15(14);
- T_0_15(15);
+ T_0_15( 0, A, B, C, D, E);
+ T_0_15( 1, E, A, B, C, D);
+ T_0_15( 2, D, E, A, B, C);
+ T_0_15( 3, C, D, E, A, B);
+ T_0_15( 4, B, C, D, E, A);
+ T_0_15( 5, A, B, C, D, E);
+ T_0_15( 6, E, A, B, C, D);
+ T_0_15( 7, D, E, A, B, C);
+ T_0_15( 8, C, D, E, A, B);
+ T_0_15( 9, B, C, D, E, A);
+ T_0_15(10, A, B, C, D, E);
+ T_0_15(11, E, A, B, C, D);
+ T_0_15(12, D, E, A, B, C);
+ T_0_15(13, C, D, E, A, B);
+ T_0_15(14, B, C, D, E, A);
+ T_0_15(15, A, B, C, D, E);
/* Round 1 - tail. Input from 512-bit mixing array */
- T_16_19(16); T_16_19(17); T_16_19(18); T_16_19(19);
+ T_16_19(16, E, A, B, C, D);
+ T_16_19(17, D, E, A, B, C);
+ T_16_19(18, C, D, E, A, B);
+ T_16_19(19, B, C, D, E, A);
/* Round 2 */
- T_20_39(20); T_20_39(21); T_20_39(22); T_20_39(23); T_20_39(24);
- T_20_39(25); T_20_39(26); T_20_39(27); T_20_39(28); T_20_39(29);
- T_20_39(30); T_20_39(31); T_20_39(32); T_20_39(33); T_20_39(34);
- T_20_39(35); T_20_39(36); T_20_39(37); T_20_39(38); T_20_39(39);
+ T_20_39(20, A, B, C, D, E);
+ T_20_39(21, E, A, B, C, D);
+ T_20_39(22, D, E, A, B, C);
+ T_20_39(23, C, D, E, A, B);
+ T_20_39(24, B, C, D, E, A);
+ T_20_39(25, A, B, C, D, E);
+ T_20_39(26, E, A, B, C, D);
+ T_20_39(27, D, E, A, B, C);
+ T_20_39(28, C, D, E, A, B);
+ T_20_39(29, B, C, D, E, A);
+ T_20_39(30, A, B, C, D, E);
+ T_20_39(31, E, A, B, C, D);
+ T_20_39(32, D, E, A, B, C);
+ T_20_39(33, C, D, E, A, B);
+ T_20_39(34, B, C, D, E, A);
+ T_20_39(35, A, B, C, D, E);
+ T_20_39(36, E, A, B, C, D);
+ T_20_39(37, D, E, A, B, C);
+ T_20_39(38, C, D, E, A, B);
+ T_20_39(39, B, C, D, E, A);
/* Round 3 */
- T_40_59(40); T_40_59(41); T_40_59(42); T_40_59(43); T_40_59(44);
- T_40_59(45); T_40_59(46); T_40_59(47); T_40_59(48); T_40_59(49);
- T_40_59(50); T_40_59(51); T_40_59(52); T_40_59(53); T_40_59(54);
- T_40_59(55); T_40_59(56); T_40_59(57); T_40_59(58); T_40_59(59);
+ T_40_59(40, A, B, C, D, E);
+ T_40_59(41, E, A, B, C, D);
+ T_40_59(42, D, E, A, B, C);
+ T_40_59(43, C, D, E, A, B);
+ T_40_59(44, B, C, D, E, A);
+ T_40_59(45, A, B, C, D, E);
+ T_40_59(46, E, A, B, C, D);
+ T_40_59(47, D, E, A, B, C);
+ T_40_59(48, C, D, E, A, B);
+ T_40_59(49, B, C, D, E, A);
+ T_40_59(50, A, B, C, D, E);
+ T_40_59(51, E, A, B, C, D);
+ T_40_59(52, D, E, A, B, C);
+ T_40_59(53, C, D, E, A, B);
+ T_40_59(54, B, C, D, E, A);
+ T_40_59(55, A, B, C, D, E);
+ T_40_59(56, E, A, B, C, D);
+ T_40_59(57, D, E, A, B, C);
+ T_40_59(58, C, D, E, A, B);
+ T_40_59(59, B, C, D, E, A);
/* Round 4 */
- T_60_79(60); T_60_79(61); T_60_79(62); T_60_79(63); T_60_79(64);
- T_60_79(65); T_60_79(66); T_60_79(67); T_60_79(68); T_60_79(69);
- T_60_79(70); T_60_79(71); T_60_79(72); T_60_79(73); T_60_79(74);
- T_60_79(75); T_60_79(76); T_60_79(77); T_60_79(78); T_60_79(79);
+ T_60_79(60, A, B, C, D, E);
+ T_60_79(61, E, A, B, C, D);
+ T_60_79(62, D, E, A, B, C);
+ T_60_79(63, C, D, E, A, B);
+ T_60_79(64, B, C, D, E, A);
+ T_60_79(65, A, B, C, D, E);
+ T_60_79(66, E, A, B, C, D);
+ T_60_79(67, D, E, A, B, C);
+ T_60_79(68, C, D, E, A, B);
+ T_60_79(69, B, C, D, E, A);
+ T_60_79(70, A, B, C, D, E);
+ T_60_79(71, E, A, B, C, D);
+ T_60_79(72, D, E, A, B, C);
+ T_60_79(73, C, D, E, A, B);
+ T_60_79(74, B, C, D, E, A);
+ T_60_79(75, A, B, C, D, E);
+ T_60_79(76, E, A, B, C, D);
+ T_60_79(77, D, E, A, B, C);
+ T_60_79(78, C, D, E, A, B);
+ T_60_79(79, B, C, D, E, A);
ctx->H[0] += A;
ctx->H[1] += B;
next prev parent reply other threads:[~2009-08-06 19:41 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-08-06 15:13 [PATCH 0/7] block-sha1: improved SHA1 hashing Linus Torvalds
2009-08-06 15:15 ` [PATCH 1/7] block-sha1: add new optimized C 'block-sha1' routines Linus Torvalds
2009-08-06 15:16 ` [PATCH 2/7] block-sha1: try to use rol/ror appropriately Linus Torvalds
2009-08-06 15:18 ` [PATCH 3/7] block-sha1: make the 'ntohl()' part of the first SHA1 loop Linus Torvalds
2009-08-06 15:20 ` [PATCH 4/7] block-sha1: re-use the temporary array as we calculate the SHA1 Linus Torvalds
2009-08-06 15:22 ` [PATCH 5/7] block-sha1: macroize the rounds a bit further Linus Torvalds
2009-08-06 15:24 ` [PATCH 6/7] block-sha1: Use '(B&C)+(D&(B^C))' instead of '(B&C)|(D&(B|C))' in round 3 Linus Torvalds
2009-08-06 15:25 ` [PATCH 7/7] block-sha1: get rid of redundant 'lenW' context Linus Torvalds
2009-08-06 18:25 ` [PATCH 2/7] block-sha1: try to use rol/ror appropriately Bert Wesarg
2009-08-06 17:22 ` [PATCH 0/7] block-sha1: improved SHA1 hashing Artur Skawina
2009-08-06 18:09 ` Linus Torvalds
2009-08-06 19:10 ` Artur Skawina
2009-08-06 19:41 ` Linus Torvalds [this message]
2009-08-06 20:08 ` Artur Skawina
2009-08-06 20:53 ` Linus Torvalds
2009-08-06 21:24 ` Linus Torvalds
2009-08-06 21:39 ` Artur Skawina
2009-08-06 21:52 ` Artur Skawina
2009-08-06 22:27 ` Linus Torvalds
2009-08-06 22:33 ` Linus Torvalds
2009-08-06 23:19 ` Artur Skawina
2009-08-06 23:42 ` Linus Torvalds
2009-08-06 22:55 ` Artur Skawina
2009-08-06 23:04 ` Linus Torvalds
2009-08-06 23:25 ` Linus Torvalds
2009-08-07 0:13 ` Linus Torvalds
2009-08-07 1:30 ` Artur Skawina
2009-08-07 1:55 ` Linus Torvalds
2009-08-07 0:53 ` Artur Skawina
2009-08-07 2:23 ` Linus Torvalds
2009-08-07 4:16 ` Artur Skawina
[not found] ` <alpine.LFD.2.01.0908071614310.3288@localhost.localdomain>
[not found] ` <4A7CBD28.6070306@gmail.com>
[not found] ` <4A7CBF47.9000903@gmail.com>
[not found] ` <alpine.LFD.2.01.0908071700290.3288@localhost.localdomain>
[not found] ` <4A7CC380.3070008@gmail.com>
2009-08-08 4:16 ` Linus Torvalds
2009-08-08 5:34 ` Artur Skawina
2009-08-08 17:10 ` Linus Torvalds
2009-08-08 18:12 ` Artur Skawina
2009-08-08 22:58 ` Artur Skawina
2009-08-08 23:36 ` Artur Skawina
-- strict thread matches above, loose matches on Subject: below --
2009-08-07 7:36 George Spelvin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.LFD.2.01.0908061233360.3390@localhost.localdomain \
--to=torvalds@linux-foundation.org \
--cc=art.08.09@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).