linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
@ 2017-05-17  4:03 Eric Biggers
  2017-05-17 20:44 ` Josh Poimboeuf
  2017-05-23  5:01 ` Herbert Xu
  0 siblings, 2 replies; 6+ messages in thread
From: Eric Biggers @ 2017-05-17  4:03 UTC (permalink / raw)
  To: linux-crypto
  Cc: Herbert Xu, David S . Miller, x86, linux-kernel, Josh Poimboeuf,
	Eric Biggers

From: Eric Biggers <ebiggers@google.com>

When using the "aes-asm" implementation of AES (*not* the AES-NI
implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
following warning was reported, along with a long unwinder dump:

	WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c

The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
temporary register, which breaks stack traces if an interrupt occurs.

Fix this by replacing %rbp with %r9, which was being used to hold the
saved value of %rbp.  This required rearranging the AES round macro
slightly since %r9d cannot be used as the target of a move from %ah-%dh.

Performance is essentially unchanged --- actually about 0.2% faster than
before.  Interestingly, I also measured aes-generic as being nearly 7%
faster than aes-asm, so perhaps aes-asm has outlived its usefulness...

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-x86_64-asm_64.S | 47 +++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
index 910565547163..8739cf7795de 100644
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ b/arch/x86/crypto/aes-x86_64-asm_64.S
@@ -42,17 +42,15 @@
 #define R5E	%esi
 #define R6	%rdi
 #define R6E	%edi
-#define R7	%rbp
-#define R7E	%ebp
+#define R7	%r9	/* don't use %rbp; it breaks stack traces */
+#define R7E	%r9d
 #define R8	%r8
-#define R9	%r9
 #define R10	%r10
 #define R11	%r11
 
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
 	ENTRY(FUNC);			\
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	leaq	KEY+48(r8),r9;		\
 	movq	r10,r11;		\
 	movl	(r7),r5 ## E;		\
@@ -70,9 +68,8 @@
 	je	B192;			\
 	leaq	32(r9),r9;
 
-#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
 	movq	r1,r2;			\
-	movq	r3,r4;			\
 	movl	r5 ## E,(r9);		\
 	movl	r6 ## E,4(r9);		\
 	movl	r7 ## E,8(r9);		\
@@ -88,12 +85,12 @@
 	movl	TAB(,r6,4),r6 ## E;	\
 	roll	$16,r2 ## E;		\
 	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## H,r7 ## E;	\
-	movzbl	r4 ## L,r4 ## E;	\
+	movzbl	r4 ## L,r7 ## E;	\
+	movzbl	r4 ## H,r4 ## E;	\
 	xorl	OFFSET(r8),ra ## E;	\
 	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r7,4),r5 ## E;\
-	xorl	TAB+2048(,r4,4),r6 ## E;\
+	xorl	TAB+3072(,r4,4),r5 ## E;\
+	xorl	TAB+2048(,r7,4),r6 ## E;\
 	movzbl	r1 ## L,r7 ## E;	\
 	movzbl	r1 ## H,r4 ## E;	\
 	movl	TAB+1024(,r4,4),r4 ## E;\
@@ -101,19 +98,19 @@
 	roll	$16,r1 ## E;		\
 	shrl	$16,r3 ## E;		\
 	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## H,r7 ## E;	\
-	movzbl	r3 ## L,r3 ## E;	\
-	xorl	TAB+3072(,r7,4),r4 ## E;\
-	xorl	TAB+2048(,r3,4),r5 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r3 ## E;	\
+	movzbl	r3 ## L,r7 ## E;	\
+	movzbl	r3 ## H,r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r4 ## E;\
+	xorl	TAB+2048(,r7,4),r5 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r3 ## E;	\
 	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r7,4),r6 ## E;\
-	movl	TAB+2048(,r3,4),r3 ## E;\
-	movzbl	r1 ## H,r7 ## E;	\
-	movzbl	r1 ## L,r1 ## E;	\
-	xorl	TAB+1024(,r7,4),r6 ## E;\
-	xorl	TAB(,r1,4),r3 ## E;	\
+	xorl	TAB+3072(,r3,4),r6 ## E;\
+	movl	TAB+2048(,r7,4),r3 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r1 ## E;	\
+	xorl	TAB+1024(,r1,4),r6 ## E;\
+	xorl	TAB(,r7,4),r3 ## E;	\
 	movzbl	r2 ## H,r1 ## E;	\
 	movzbl	r2 ## L,r7 ## E;	\
 	shrl	$16,r2 ## E;		\
@@ -131,9 +128,9 @@
 	movl	r4 ## E,r2 ## E;
 
 #define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
 
-#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
 
 #define encrypt_round(TAB,OFFSET) \
 	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
  2017-05-17  4:03 [PATCH] crypto: x86/aes - Don't use %rbp as temporary register Eric Biggers
@ 2017-05-17 20:44 ` Josh Poimboeuf
  2017-05-17 22:21   ` Eric Biggers
  2017-05-23  5:01 ` Herbert Xu
  1 sibling, 1 reply; 6+ messages in thread
From: Josh Poimboeuf @ 2017-05-17 20:44 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, Herbert Xu, David S . Miller, x86, linux-kernel,
	Eric Biggers

On Tue, May 16, 2017 at 09:03:08PM -0700, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> When using the "aes-asm" implementation of AES (*not* the AES-NI
> implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
> following warning was reported, along with a long unwinder dump:
> 
> 	WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c
> 
> The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
> temporary register, which breaks stack traces if an interrupt occurs.
> 
> Fix this by replacing %rbp with %r9, which was being used to hold the
> saved value of %rbp.  This required rearranging the AES round macro
> slightly since %r9d cannot be used as the target of a move from %ah-%dh.
> 
> Performance is essentially unchanged --- actually about 0.2% faster than
> before.  Interestingly, I also measured aes-generic as being nearly 7%
> faster than aes-asm, so perhaps aes-asm has outlived its usefulness...
> 
> Signed-off-by: Eric Biggers <ebiggers@google.com>

Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>

-- 
Josh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
  2017-05-17 20:44 ` Josh Poimboeuf
@ 2017-05-17 22:21   ` Eric Biggers
  2017-05-19  1:56     ` Josh Poimboeuf
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Biggers @ 2017-05-17 22:21 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: linux-crypto, Herbert Xu, David S . Miller, x86, linux-kernel,
	Eric Biggers

On Wed, May 17, 2017 at 03:44:27PM -0500, Josh Poimboeuf wrote:
> On Tue, May 16, 2017 at 09:03:08PM -0700, Eric Biggers wrote:
> > From: Eric Biggers <ebiggers@google.com>
> > 
> > When using the "aes-asm" implementation of AES (*not* the AES-NI
> > implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
> > following warning was reported, along with a long unwinder dump:
> > 
> > 	WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c
> > 
> > The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
> > temporary register, which breaks stack traces if an interrupt occurs.
> > 
> > Fix this by replacing %rbp with %r9, which was being used to hold the
> > saved value of %rbp.  This required rearranging the AES round macro
> > slightly since %r9d cannot be used as the target of a move from %ah-%dh.
> > 
> > Performance is essentially unchanged --- actually about 0.2% faster than
> > before.  Interestingly, I also measured aes-generic as being nearly 7%
> > faster than aes-asm, so perhaps aes-asm has outlived its usefulness...
> > 
> > Signed-off-by: Eric Biggers <ebiggers@google.com>
> 
> Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
> 

Hmm, it looks like a number of other algorithms in arch/x86/crypto/ use %rbp (or
%ebp), e.g. blowfish, camellia, cast5, and aes-i586.  Presumably they have the
same problem.  I'm a little confused: do these all need to be fixed, and
when/why did this start being considered broken?

Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
  2017-05-17 22:21   ` Eric Biggers
@ 2017-05-19  1:56     ` Josh Poimboeuf
  2017-05-19  2:50       ` Eric Biggers
  0 siblings, 1 reply; 6+ messages in thread
From: Josh Poimboeuf @ 2017-05-19  1:56 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, Herbert Xu, David S . Miller, x86, linux-kernel,
	Eric Biggers

On Wed, May 17, 2017 at 03:21:41PM -0700, Eric Biggers wrote:
> On Wed, May 17, 2017 at 03:44:27PM -0500, Josh Poimboeuf wrote:
> > On Tue, May 16, 2017 at 09:03:08PM -0700, Eric Biggers wrote:
> > > From: Eric Biggers <ebiggers@google.com>
> > > 
> > > When using the "aes-asm" implementation of AES (*not* the AES-NI
> > > implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
> > > following warning was reported, along with a long unwinder dump:
> > > 
> > > 	WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c
> > > 
> > > The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
> > > temporary register, which breaks stack traces if an interrupt occurs.
> > > 
> > > Fix this by replacing %rbp with %r9, which was being used to hold the
> > > saved value of %rbp.  This required rearranging the AES round macro
> > > slightly since %r9d cannot be used as the target of a move from %ah-%dh.
> > > 
> > > Performance is essentially unchanged --- actually about 0.2% faster than
> > > before.  Interestingly, I also measured aes-generic as being nearly 7%
> > > faster than aes-asm, so perhaps aes-asm has outlived its usefulness...
> > > 
> > > Signed-off-by: Eric Biggers <ebiggers@google.com>
> > 
> > Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
> > 
> 
> Hmm, it looks like a number of other algorithms in arch/x86/crypto/ use %rbp (or
> %ebp), e.g. blowfish, camellia, cast5, and aes-i586.  Presumably they have the
> same problem.  I'm a little confused: do these all need to be fixed, and
> when/why did this start being considered broken?

This warning was only recently added, with the goal of flushing out
these types of issues with hand-coded asm to make frame pointer based
stack traces more reliable.  I can take a look at fixing the rest of
them if you want.

-- 
Josh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
  2017-05-19  1:56     ` Josh Poimboeuf
@ 2017-05-19  2:50       ` Eric Biggers
  0 siblings, 0 replies; 6+ messages in thread
From: Eric Biggers @ 2017-05-19  2:50 UTC (permalink / raw)
  To: Josh Poimboeuf
  Cc: linux-crypto, Herbert Xu, David S . Miller, x86, linux-kernel,
	Eric Biggers

On Thu, May 18, 2017 at 08:56:32PM -0500, Josh Poimboeuf wrote:
> > 
> > Hmm, it looks like a number of other algorithms in arch/x86/crypto/ use %rbp (or
> > %ebp), e.g. blowfish, camellia, cast5, and aes-i586.  Presumably they have the
> > same problem.  I'm a little confused: do these all need to be fixed, and
> > when/why did this start being considered broken?
> 
> This warning was only recently added, with the goal of flushing out
> these types of issues with hand-coded asm to make frame pointer based
> stack traces more reliable.  I can take a look at fixing the rest of
> them if you want.
> 

Okay.  I'm worried you might run into one that is difficult to fix due to lack
of spare registers.  But if you're interested in looking into it, please go
ahead.  Thanks!

Eric

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] crypto: x86/aes - Don't use %rbp as temporary register
  2017-05-17  4:03 [PATCH] crypto: x86/aes - Don't use %rbp as temporary register Eric Biggers
  2017-05-17 20:44 ` Josh Poimboeuf
@ 2017-05-23  5:01 ` Herbert Xu
  1 sibling, 0 replies; 6+ messages in thread
From: Herbert Xu @ 2017-05-23  5:01 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-crypto, David S . Miller, x86, linux-kernel, Josh Poimboeuf,
	Eric Biggers

On Tue, May 16, 2017 at 09:03:08PM -0700, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> When using the "aes-asm" implementation of AES (*not* the AES-NI
> implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the
> following warning was reported, along with a long unwinder dump:
> 
> 	WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c
> 
> The problem is that aes_enc_block() and aes_dec_block() use %rbp as a
> temporary register, which breaks stack traces if an interrupt occurs.
> 
> Fix this by replacing %rbp with %r9, which was being used to hold the
> saved value of %rbp.  This required rearranging the AES round macro
> slightly since %r9d cannot be used as the target of a move from %ah-%dh.
> 
> Performance is essentially unchanged --- actually about 0.2% faster than
> before.  Interestingly, I also measured aes-generic as being nearly 7%
> faster than aes-asm, so perhaps aes-asm has outlived its usefulness...
> 
> Signed-off-by: Eric Biggers <ebiggers@google.com>

Patch applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-05-23  5:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-17  4:03 [PATCH] crypto: x86/aes - Don't use %rbp as temporary register Eric Biggers
2017-05-17 20:44 ` Josh Poimboeuf
2017-05-17 22:21   ` Eric Biggers
2017-05-19  1:56     ` Josh Poimboeuf
2017-05-19  2:50       ` Eric Biggers
2017-05-23  5:01 ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).