From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, syzbot <syzkaller@googlegroups.com>,
Eric Biggers <ebiggers@google.com>,
Josh Poimboeuf <jpoimboe@redhat.com>,
Herbert Xu <herbert@gondor.apana.org.au>
Subject: [PATCH 3.18 21/58] crypto: x86/twofish-3way - Fix %rbp usage
Date: Fri, 23 Feb 2018 19:26:20 +0100 [thread overview]
Message-ID: <20180223170210.216320862@linuxfoundation.org> (raw)
In-Reply-To: <20180223170206.724655284@linuxfoundation.org>
3.18-stable review patch. If anyone has any objections, please let me know.
------------------
From: Eric Biggers <ebiggers@google.com>
commit d8c7fe9f2a486a6e5f0d5229ca43807af5ab22c6 upstream.
Using %rbp as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.
In twofish-3way, we can't simply replace %rbp with another register
because there are none available. Instead, we use the stack to hold the
values that %rbp, %r11, and %r12 were holding previously. Each of these
values represents the half of the output from the previous Feistel round
that is being passed on unchanged to the following round. They are only
used once per round, when they are exchanged with %rax, %rbx, and %rcx.
As a result, we free up 3 registers (one per block) and can reassign
them so that %rbp is not used, and additionally %r14 and %r15 are not
used so they do not need to be saved/restored.
There may be a small overhead caused by replacing 'xchg REG, REG' with
the needed sequence 'mov MEM, REG; mov REG, MEM; mov REG, REG' once per
round. But, counterintuitively, when I tested "ctr-twofish-3way" on a
Haswell processor, the new version was actually about 2% faster.
(Perhaps 'xchg' is not as well optimized as plain moves.)
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 118 ++++++++++++++-------------
1 file changed, 63 insertions(+), 55 deletions(-)
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -55,29 +55,31 @@
#define RAB1bl %bl
#define RAB2bl %cl
+#define CD0 0x0(%rsp)
+#define CD1 0x8(%rsp)
+#define CD2 0x10(%rsp)
+
+# used only before/after all rounds
#define RCD0 %r8
#define RCD1 %r9
#define RCD2 %r10
-#define RCD0d %r8d
-#define RCD1d %r9d
-#define RCD2d %r10d
-
-#define RX0 %rbp
-#define RX1 %r11
-#define RX2 %r12
-
-#define RX0d %ebp
-#define RX1d %r11d
-#define RX2d %r12d
-
-#define RY0 %r13
-#define RY1 %r14
-#define RY2 %r15
-
-#define RY0d %r13d
-#define RY1d %r14d
-#define RY2d %r15d
+# used only during rounds
+#define RX0 %r8
+#define RX1 %r9
+#define RX2 %r10
+
+#define RX0d %r8d
+#define RX1d %r9d
+#define RX2d %r10d
+
+#define RY0 %r11
+#define RY1 %r12
+#define RY2 %r13
+
+#define RY0d %r11d
+#define RY1d %r12d
+#define RY2d %r13d
#define RT0 %rdx
#define RT1 %rsi
@@ -85,6 +87,8 @@
#define RT0d %edx
#define RT1d %esi
+#define RT1bl %sil
+
#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
movzbl ab ## bl, tmp2 ## d; \
movzbl ab ## bh, tmp1 ## d; \
@@ -92,6 +96,11 @@
op1##l T0(CTX, tmp2, 4), dst ## d; \
op2##l T1(CTX, tmp1, 4), dst ## d;
+#define swap_ab_with_cd(ab, cd, tmp) \
+ movq cd, tmp; \
+ movq ab, cd; \
+ movq tmp, ab;
+
/*
* Combined G1 & G2 function. Reordered with help of rotates to have moves
* at begining.
@@ -110,15 +119,15 @@
/* G1,2 && G2,2 */ \
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
- xchgq cd ## 0, ab ## 0; \
+ swap_ab_with_cd(ab ## 0, cd ## 0, RT0); \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
- xchgq cd ## 1, ab ## 1; \
+ swap_ab_with_cd(ab ## 1, cd ## 1, RT0); \
\
do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
- xchgq cd ## 2, ab ## 2;
+ swap_ab_with_cd(ab ## 2, cd ## 2, RT0);
#define enc_round_end(ab, x, y, n) \
addl y ## d, x ## d; \
@@ -168,6 +177,16 @@
decrypt_round3(ba, dc, (n*2)+1); \
decrypt_round3(ba, dc, (n*2));
+#define push_cd() \
+ pushq RCD2; \
+ pushq RCD1; \
+ pushq RCD0;
+
+#define pop_cd() \
+ popq RCD0; \
+ popq RCD1; \
+ popq RCD2;
+
#define inpack3(in, n, xy, m) \
movq 4*(n)(in), xy ## 0; \
xorq w+4*m(CTX), xy ## 0; \
@@ -223,11 +242,8 @@ ENTRY(__twofish_enc_blk_3way)
* %rdx: src, RIO
* %rcx: bool, if true: xor output
*/
- pushq %r15;
- pushq %r14;
pushq %r13;
pushq %r12;
- pushq %rbp;
pushq %rbx;
pushq %rcx; /* bool xor */
@@ -235,40 +251,36 @@ ENTRY(__twofish_enc_blk_3way)
inpack_enc3();
- encrypt_cycle3(RAB, RCD, 0);
- encrypt_cycle3(RAB, RCD, 1);
- encrypt_cycle3(RAB, RCD, 2);
- encrypt_cycle3(RAB, RCD, 3);
- encrypt_cycle3(RAB, RCD, 4);
- encrypt_cycle3(RAB, RCD, 5);
- encrypt_cycle3(RAB, RCD, 6);
- encrypt_cycle3(RAB, RCD, 7);
+ push_cd();
+ encrypt_cycle3(RAB, CD, 0);
+ encrypt_cycle3(RAB, CD, 1);
+ encrypt_cycle3(RAB, CD, 2);
+ encrypt_cycle3(RAB, CD, 3);
+ encrypt_cycle3(RAB, CD, 4);
+ encrypt_cycle3(RAB, CD, 5);
+ encrypt_cycle3(RAB, CD, 6);
+ encrypt_cycle3(RAB, CD, 7);
+ pop_cd();
popq RIO; /* dst */
- popq %rbp; /* bool xor */
+ popq RT1; /* bool xor */
- testb %bpl, %bpl;
+ testb RT1bl, RT1bl;
jnz .L__enc_xor3;
outunpack_enc3(mov);
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
.L__enc_xor3:
outunpack_enc3(xor);
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
ENDPROC(__twofish_enc_blk_3way)
@@ -278,35 +290,31 @@ ENTRY(twofish_dec_blk_3way)
* %rsi: dst
* %rdx: src, RIO
*/
- pushq %r15;
- pushq %r14;
pushq %r13;
pushq %r12;
- pushq %rbp;
pushq %rbx;
pushq %rsi; /* dst */
inpack_dec3();
- decrypt_cycle3(RAB, RCD, 7);
- decrypt_cycle3(RAB, RCD, 6);
- decrypt_cycle3(RAB, RCD, 5);
- decrypt_cycle3(RAB, RCD, 4);
- decrypt_cycle3(RAB, RCD, 3);
- decrypt_cycle3(RAB, RCD, 2);
- decrypt_cycle3(RAB, RCD, 1);
- decrypt_cycle3(RAB, RCD, 0);
+ push_cd();
+ decrypt_cycle3(RAB, CD, 7);
+ decrypt_cycle3(RAB, CD, 6);
+ decrypt_cycle3(RAB, CD, 5);
+ decrypt_cycle3(RAB, CD, 4);
+ decrypt_cycle3(RAB, CD, 3);
+ decrypt_cycle3(RAB, CD, 2);
+ decrypt_cycle3(RAB, CD, 1);
+ decrypt_cycle3(RAB, CD, 0);
+ pop_cd();
popq RIO; /* dst */
outunpack_dec3();
popq %rbx;
- popq %rbp;
popq %r12;
popq %r13;
- popq %r14;
- popq %r15;
ret;
ENDPROC(twofish_dec_blk_3way)
next prev parent reply other threads:[~2018-02-23 18:30 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-23 18:25 [PATCH 3.18 00/58] 3.18.96-stable review Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 01/58] IB/mlx4: Fix incorrectly releasing steerable UD QPs when have only ETH ports Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 02/58] PM / devfreq: Propagate error from devfreq_add_device() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 03/58] s390: fix handling of -1 in set{,fs}[gu]id16 syscalls Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 04/58] arm: spear600: Add missing interrupt-parent of rtc Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 05/58] arm: spear13xx: Fix dmas cells Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 06/58] arm: spear13xx: Fix spics gpio controllers warning Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 07/58] ALSA: seq: Fix regression by incorrect ioctl_mutex usages Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 09/58] ext4: save error to disk in __ext4_grp_locked_error() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 11/58] video: fbdev: atmel_lcdfb: fix display-timings lookup Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 12/58] console/dummy: leave .con_font_get set to NULL Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 13/58] Btrfs: fix deadlock in run_delalloc_nocow Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 14/58] Btrfs: fix crash due to not cleaning up tree log blocks dirty bits Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 16/58] ARM: dts: s5pv210: add interrupt-parent for ohci Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 17/58] media: r820t: fix r820t_write_reg for KASAN Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 18/58] mm,vmscan: Make unregister_shrinker() no-op if register_shrinker() failed Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 19/58] xfrm: check id proto in validate_tmpl() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 20/58] selinux: skip bounded transition processing if the policy isnt loaded Greg Kroah-Hartman
2018-02-23 18:26 ` Greg Kroah-Hartman [this message]
2018-02-23 18:26 ` [PATCH 3.18 22/58] staging: android: ion: Add __GFP_NOWARN for system contig heap Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 23/58] netfilter: x_tables: fix int overflow in xt_alloc_table_info() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 24/58] netfilter: x_tables: avoid out-of-bounds reads in xt_request_find_{match|target} Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 25/58] netfilter: ipt_CLUSTERIP: fix out-of-bounds accesses in clusterip_tg_check() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 26/58] netfilter: on sockopt() acquire sock lock only in the required scope Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 27/58] netfilter: xt_RATEEST: acquire xt_rateest_mutex for hash insert Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 28/58] net: avoid skb_warn_bad_offload on IS_ERR Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 29/58] Provide a function to create a NUL-terminated string from unterminated data Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 30/58] selinux: ensure the context is NUL terminated in security_context_to_sid_core() Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 31/58] ASoC: ux500: add MODULE_LICENSE tag Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 32/58] video: fbdev/mmp: add MODULE_LICENSE Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 33/58] dn_getsockoptdecnet: move nf_{get/set}sockopt outside sock lock Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 34/58] usbip: keep usbip_device sockfd state in sync with tcp_socket Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 35/58] usb: build drivers/usb/common/ when USB_SUPPORT is set Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 36/58] ARM: AM33xx: PRM: Remove am33xx_pwrdm_read_prev_pwrst function Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 37/58] ARM: dts: am4372: Correct the interrupts_properties of McASP Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 38/58] perf top: Fix window dimensions change handling Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 39/58] perf bench numa: Fixup discontiguous/sparse numa nodes Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 40/58] media: s5k6aa: describe some function parameters Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 41/58] scripts/kernel-doc: Dont fail with status != 0 if error encountered with -none Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 42/58] m68k: add missing SOFTIRQENTRY_TEXT linker section Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 43/58] powerpc/perf: Fix oops when grouping different pmu events Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 44/58] s390/dasd: prevent prefix I/O error Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 45/58] gianfar: fix a flooded alignment reports because of padding issue Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 46/58] net_sched: red: Avoid devision by zero Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 47/58] net_sched: red: Avoid illegal values Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 48/58] btrfs: Fix possible off-by-one in btrfs_search_path_in_tree Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 49/58] 509: fix printing uninitialized stack memory when OID is empty Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 50/58] spi: sun4i: disable clocks in the remove function Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 51/58] xfrm: Fix stack-out-of-bounds with misconfigured transport mode policies Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 52/58] dmaengine: jz4740: disable/unprepare clk if probe fails Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 53/58] mm/early_ioremap: Fix boot hang with earlyprintk=efi,keep Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 54/58] x86/mm/kmmio: Fix mmiotrace for page unaligned addresses Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 55/58] xen: XEN_ACPI_PROCESSOR is Dom0-only Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 56/58] hippi: Fix a Fix a possible sleep-in-atomic bug in rr_close Greg Kroah-Hartman
2018-02-23 18:26 ` [PATCH 3.18 58/58] crypto: s5p-sss - Fix kernel Oops in AES-ECB mode Greg Kroah-Hartman
2018-02-24 0:37 ` [PATCH 3.18 00/58] 3.18.96-stable review Shuah Khan
2018-02-24 17:54 ` Guenter Roeck
[not found] ` <CALpmF+EXvmUHRD=bKEKT3fUJYRQ2g48odV4MEbZ2tNRMj7KEbQ@mail.gmail.com>
2018-02-24 18:38 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180223170210.216320862@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=ebiggers@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=jpoimboe@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=syzkaller@googlegroups.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).