git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: linux@horizon.com
To: paulus@samba.org
Cc: git@vger.kernel.org, linux@horizon.com
Subject: Revised PPC assembly implementation
Date: 25 Apr 2005 03:13:37 -0000	[thread overview]
Message-ID: <20050425031337.16605.qmail@science.horizon.com> (raw)
In-Reply-To: <17003.9009.226712.220822@cargo.ozlabs.ibm.com>

Three changes:
- Added stack frame as per your description.
- Found two bugs.  (Cutting & pasting too fast.)  Fixed.
- Minor scheduling improvements.  More to come.

Which lead to three questions:
- Is the stack set properly now?
- Does it produce the right answer now?
- Is it any faster?

Thanks for your help!


/*
 * SHA-1 implementation for PowerPC.
 *
 * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
 */

/*
 * We roll the registers for A, B, C, D, E around on each
 * iteration; E on iteration t is D on iteration t+1, and so on.
 * We use registers 6 - 10 for this.  (Registers 27 - 31 hold
 * the previous values.)
 */
#define RA(t)	((((t)+4)%5)+6)
#define RB(t)	((((t)+3)%5)+6)
#define RC(t)	((((t)+2)%5)+6)
#define RD(t)	((((t)+1)%5)+6)
#define RE(t)	((((t)+0)%5)+6)

/* We use registers 11 - 26 for the W values */
#define W(t)	(((t)%16)+11)

/* Register 5 is used for the constant k */

/*
 * Note that, in the previous step, RC was rotated, and RA was computed.
 * So try to postpone using them, *especially* the latter.
 */

/* f(b,c,d) = "bitwise b ? c : d" = (b & c) + (~b & d) */
#define STEPD0(t)				\
	andc	%r0,RD(t),RB(t);		\
	add	%r0,%r0,W(t)
	add	RE(t),RE(t),%r0;		\
	and	%r0,RC(t),RB(t);		\
	add	%r0,%r0,%r5
	add	RE(t),RE(t),%r0;		\
	rotlwi	%r0,RA(t),5;			\
	rotlwi	RB(t),RB(t),30;			\
	add	RE(t),RE(t),%r0;

/* f(b,c,d) = b ^ c ^ d */
#define STEPD1(t)				\
	xor	%r0,RD(t),RB(t);		\
	xor	%r0,%r0,RC(t);			\
	add	%r0,%r0,W(t)
	add	RE(t),RE(t),%r0;		\
	rotlwi	%r0,RA(t),5;			\
	add	%r0,%r0,%r5
	rotlwi	RB(t),RB(t),30;			\
	add	RE(t),RE(t),%r0;

/* f(b,c,d) = majority(b,c,d) = (b & d) + (c & (b ^ d)) */
#define STEPD2(t)				\
	and	%r0,RD(t),RB(t);		\
	add	%r0,%r0,W(t)
	add	RE(t),RE(t),%r0;		\
	xor	%r0,RD(t),RB(t);		\
	and	%r0,%r0,RC(t);			\
	add	RE(t),RE(t),%r0;		\
	rotlwi	%r0,RA(t),5;			\
	add	%r0,%r0,%r5
	rotlwi	RB(t),RB(t),30;			\
	add	RE(t),RE(t),%r0;

#define LOADW(t)				\
	lwz	W(t),(t)*4(%r4)

#define UPDATEW(t)				\
	xor	%r0,W((t)-3),W((t)-8);		\
	xor	W(t),W((t)-16),W((t)-14);	\
	xor	W(t),W(t),%r0;			\
	rotlwi	W(t),W(t),1

#define STEP0LD4(t)				\
	STEPD0(t);     LOADW((t)+4);		\
	STEPD0((t)+1); LOADW((t)+5);		\
	STEPD0((t)+2); LOADW((t)+6);		\
	STEPD0((t)+3); LOADW((t)+7)

#define STEPUP4(t, fn)				\
	STEP##fn(t);     UPDATEW((t)+4);	\
	STEP##fn((t)+1); UPDATEW((t)+5);	\
	STEP##fn((t)+2); UPDATEW((t)+6);	\
	STEP##fn((t)+3); UPDATEW((t)+7)

#define STEPUP20(t, fn)				\
	STEPUP4(t, fn);				\
	STEPUP4((t)+4, fn);			\
	STEPUP4((t)+8, fn);			\
	STEPUP4((t)+12, fn);			\
	STEPUP4((t)+16, fn)

	.globl	sha1_core
sha1_core:
	stwu	%r1,-80(%r1)
	stmw	%r13,4(%r1)

	/* Load up A - E */
	lmw	%r27,0(%r3)

	mtctr	%r5

1:	mr	RA(0),%r27
	LOADW(0)
	mr	RB(0),%r28
	LOADW(1)
	mr	RC(0),%r29
	LOADW(2)
	mr	RD(0),%r30
	LOADW(3)
	mr	RE(0),%r31

	lis	%r5,0x5a82	/* K0-19 */
	ori	%r5,%r5,0x7999
	STEP0LD4(0)
	STEP0LD4(4)
	STEP0LD4(8)
	STEPUP4(12, D0)
	STEPUP4(16, D0)

	lis	%r5,0x6ed9	/* K20-39 */
	ori	%r5,%r5,0xeba1
	STEPUP20(20, D1)

	lis	%r5,0x8f1b	/* K40-59 */
	ori	%r5,%r5,0xbcdc
	STEPUP20(40, D2)

	lis	%r5,0xca62	/* K60-79 */
	ori	%r5,%r5,0xc1d6
	STEPUP4(60, D1)
	STEPUP4(64, D1)
	STEPUP4(68, D1)
	STEPUP4(72, D1)
	STEPD1(76)
	STEPD1(77)
	STEPD1(78)
	STEPD1(79)

	/* Add results to original values */
	add	%r27,%r27,RA(0)
	add	%r28,%r28,RB(0)
	add	%r29,%r29,RC(0)
	add	%r30,%r30,RD(0)
	add	%r31,%r31,RE(0)

	addi	%r4,%r4,64
	bdnz	1b

	/* Save final hash, restore registers, and return */
	stmw	%r27,0(%r3)
	lmw	%r13,4(%r1)
	addi	%r1,%r1,80
	blr

  parent reply	other threads:[~2005-04-25  3:11 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-23 12:42 [PATCH] PPC assembly implementation of SHA1 linux
2005-04-23 13:03 ` linux
2005-04-24  2:49 ` Benjamin Herrenschmidt
2005-04-24  4:40 ` Paul Mackerras
2005-04-24 12:04   ` Wayne Scott
2005-04-25  0:16   ` linux
2005-04-25  3:13   ` linux [this message]
2005-04-25  9:40     ` Revised PPC assembly implementation Paul Mackerras
2005-04-25 17:34       ` linux
2005-04-25 23:00         ` Paul Mackerras
2005-04-25 23:17           ` David S. Miller
2005-04-26  1:22             ` Paul Mackerras
2005-04-27  1:47               ` linux
2005-04-27  3:39                 ` Paul Mackerras
2005-04-27 16:01                   ` linux
2005-04-26  2:14             ` linux
2005-04-26  2:35             ` linux

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050425031337.16605.qmail@science.horizon.com \
    --to=linux@horizon.com \
    --cc=git@vger.kernel.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).