From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753375AbcBWJDg (ORCPT ); Tue, 23 Feb 2016 04:03:36 -0500 Received: from terminus.zytor.com ([198.137.202.10]:57160 "EHLO terminus.zytor.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751198AbcBWJD0 (ORCPT ); Tue, 23 Feb 2016 04:03:26 -0500 Date: Tue, 23 Feb 2016 00:59:00 -0800 From: =?UTF-8?B?dGlwLWJvdCBmb3IgSm9zaCBQb2ltYm9ldWYgPHRpcGJvdEB6eXRvci5jb20+?=@zytor.com Message-ID: Cc: luto@kernel.org, akpm@linux-foundation.org, acme@kernel.org, brgerst@gmail.com, namhyung@gmail.com, jpoimboe@redhat.com, luto@amacapital.net, palves@redhat.com, mmarek@suse.cz, bp@alien8.de, dvlasenk@redhat.com, peterz@infradead.org, mingo@kernel.org, torvalds@linux-foundation.org, linux-kernel@vger.kernel.org, tglx@linutronix.de, bernd@petrovitsch.priv.at, jslaby@suse.cz, hpa@zytor.com, chris.j.arges@canonical.com Reply-To: jslaby@suse.cz, chris.j.arges@canonical.com, hpa@zytor.com, bernd@petrovitsch.priv.at, tglx@linutronix.de, linux-kernel@vger.kernel.org, torvalds@linux-foundation.org, mingo@kernel.org, peterz@infradead.org, dvlasenk@redhat.com, bp@alien8.de, mmarek@suse.cz, palves@redhat.com, jpoimboe@redhat.com, luto@amacapital.net, namhyung@gmail.com, brgerst@gmail.com, acme@kernel.org, akpm@linux-foundation.org, luto@kernel.org In-Reply-To: <9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com> References: <9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com> To: =?UTF-8?B?bGludXgtdGlwLWNvbW1pdHNAdmdlci5rZXJuZWwub3Jn?=@zytor.com Subject: =?UTF-8?B?W3RpcDp4ODYvZGVidWddIHg4Ni9hc20vY3J5cHRvOiBTaW1wbGlmeSBzdGFjayA=?= =?UTF-8?B?dXNhZ2UgaW4gc2hhLW1iIGZ1bmN0aW9ucw==?= Git-Commit-ID: =?UTF-8?B?YzUyMGY2MWFlMTUzZDhiMjg1NTNkNWRkMGVhNjk4ZTA5Njg5MzlkNw==?= X-Mailer: =?UTF-8?B?dGlwLWdpdC1sb2ctZGFlbW9u?= Robot-ID: =?UTF-8?B?PHRpcC1ib3QuZ2l0Lmtlcm5lbC5vcmc+?= Robot-Unsubscribe: =?UTF-8?B?Q29udGFjdCA8bWFpbHRvOmhwYUBrZXJuZWwub3JnPiB0byBnZXQgYmxhY2tsaXM=?= =?UTF-8?B?dGVkIGZyb20gdGhlc2UgZW1haWxz?= MIME-Version: =?UTF-8?B?MS4w?= Content-Transfer-Encoding: =?UTF-8?B?OGJpdA==?= Content-Type: =?UTF-8?B?dGV4dC9wbGFpbjsgY2hhcnNldD1VVEYtOA==?= Content-Disposition: =?UTF-8?B?aW5saW5l?= Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Commit-ID: c520f61ae153d8b28553d5dd0ea698e0968939d7 Gitweb: http://git.kernel.org/tip/c520f61ae153d8b28553d5dd0ea698e0968939d7 Author: Josh Poimboeuf AuthorDate: Thu, 21 Jan 2016 16:49:17 -0600 Committer: Ingo Molnar CommitDate: Tue, 23 Feb 2016 09:03:56 +0100 x86/asm/crypto: Simplify stack usage in sha-mb functions sha1_mb_mgr_flush_avx2() and sha1_mb_mgr_submit_avx2() both allocate a lot of stack space which is never used. Also, many of the registers being saved aren't being clobbered so there's no need to save them. Signed-off-by: Josh Poimboeuf Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Bernd Petrovitsch Cc: Borislav Petkov Cc: Brian Gerst Cc: Chris J Arges Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Slaby Cc: Linus Torvalds Cc: Michal Marek Cc: Namhyung Kim Cc: Pedro Alves Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/9402e4d87580d6b2376ed95f67b84bdcce3c830e.1453405861.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S | 32 ++---------------------- arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S | 29 +++------------------ 2 files changed, 6 insertions(+), 55 deletions(-) diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S index 85c4e1c..672eaeb 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_flush_avx2.S @@ -86,16 +86,6 @@ #define extra_blocks %arg2 #define p %arg2 - -# STACK_SPACE needs to be an odd multiple of 8 -_XMM_SAVE_SIZE = 10*16 -_GPR_SAVE_SIZE = 8*8 -_ALIGN_SIZE = 8 - -_XMM_SAVE = 0 -_GPR_SAVE = _XMM_SAVE + _XMM_SAVE_SIZE -STACK_SPACE = _GPR_SAVE + _GPR_SAVE_SIZE + _ALIGN_SIZE - .macro LABEL prefix n \prefix\n\(): .endm @@ -113,16 +103,7 @@ offset = \_offset # JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) # arg 1 : rcx : state ENTRY(sha1_mb_mgr_flush_avx2) - mov %rsp, %r10 - sub $STACK_SPACE, %rsp - and $~31, %rsp - mov %rbx, _GPR_SAVE(%rsp) - mov %r10, _GPR_SAVE+8*1(%rsp) #save rsp - mov %rbp, _GPR_SAVE+8*3(%rsp) - mov %r12, _GPR_SAVE+8*4(%rsp) - mov %r13, _GPR_SAVE+8*5(%rsp) - mov %r14, _GPR_SAVE+8*6(%rsp) - mov %r15, _GPR_SAVE+8*7(%rsp) + push %rbx # If bit (32+3) is set, then all lanes are empty mov _unused_lanes(state), unused_lanes @@ -230,16 +211,7 @@ len_is_0: mov tmp2_w, offset(job_rax) return: - - mov _GPR_SAVE(%rsp), %rbx - mov _GPR_SAVE+8*1(%rsp), %r10 #saved rsp - mov _GPR_SAVE+8*3(%rsp), %rbp - mov _GPR_SAVE+8*4(%rsp), %r12 - mov _GPR_SAVE+8*5(%rsp), %r13 - mov _GPR_SAVE+8*6(%rsp), %r14 - mov _GPR_SAVE+8*7(%rsp), %r15 - mov %r10, %rsp - + pop %rbx ret return_null: diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S index 2ab9560..a5a14c6 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_submit_avx2.S @@ -94,25 +94,12 @@ DWORD_tmp = %r9d lane_data = %r10 -# STACK_SPACE needs to be an odd multiple of 8 -STACK_SPACE = 8*8 + 16*10 + 8 - # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) # arg 1 : rcx : state # arg 2 : rdx : job ENTRY(sha1_mb_mgr_submit_avx2) - - mov %rsp, %r10 - sub $STACK_SPACE, %rsp - and $~31, %rsp - - mov %rbx, (%rsp) - mov %r10, 8*2(%rsp) #save old rsp - mov %rbp, 8*3(%rsp) - mov %r12, 8*4(%rsp) - mov %r13, 8*5(%rsp) - mov %r14, 8*6(%rsp) - mov %r15, 8*7(%rsp) + push %rbx + push %rbp mov _unused_lanes(state), unused_lanes mov unused_lanes, lane @@ -203,16 +190,8 @@ len_is_0: movl DWORD_tmp, _result_digest+1*16(job_rax) return: - - mov (%rsp), %rbx - mov 8*2(%rsp), %r10 #save old rsp - mov 8*3(%rsp), %rbp - mov 8*4(%rsp), %r12 - mov 8*5(%rsp), %r13 - mov 8*6(%rsp), %r14 - mov 8*7(%rsp), %r15 - mov %r10, %rsp - + pop %rbp + pop %rbx ret return_null: